//===----------------------------------------------------------------------===//
//
// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
-// and generates target-independent LLVM-IR. Legalization of the IR is done
-// in the codegen. However, the vectorizer uses (will use) the codegen
-// interfaces to generate IR that is likely to result in an optimal binary.
+// and generates target-independent LLVM-IR.
+// The vectorizer uses the TargetTransformInfo analysis to estimate the costs
+// of instructions in order to estimate the profitability of vectorization.
//
// The loop vectorizer combines consecutive loop iterations into a single
// 'wide' iteration. After this transformation the index is incremented
/// This function adds 0, 1, 2 ... to each vector element, starting at zero.
/// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...).
/// The sequence starts at StartIndex.
- Value *getConsecutiveVector(Value* Val, unsigned StartIdx, bool Negate);
+ Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
/// When we go over instructions in the basic block we rely on previous
/// values within the current basic block or on loop invariant values.
PHINode *Induction;
/// The induction variable of the old basic block.
PHINode *OldInduction;
+ /// Holds the extended (to the widest induction type) start index.
+ Value *ExtendedIdx;
/// Maps scalars to widened vectors.
ValueMap WidenMap;
};
+/// \brief Check if conditionally executed loads are hoistable.
+///
+/// This class has two functions. isHoistableLoad and canHoistAllLoads.
+/// isHoistableLoad should be called on all load instructions that are executed
+/// conditionally. After all conditional loads are processed, the client should
+/// call canHoistAllLoads to determine if all of the conditional execute loads
+/// have an unconditional memory access in the loop.
+class LoadHoisting {
+ typedef SmallPtrSet<Value *, 8> MemorySet;
+
+ Loop *TheLoop;
+ DominatorTree *DT;
+ MemorySet CondLoadAddrSet;
+
+public:
+ LoadHoisting(Loop *L, DominatorTree *D) : TheLoop(L), DT(D) {}
+
+ /// \brief Check if the instruction is a load with a identifiable address.
+ bool isHoistableLoad(Instruction *L);
+
+ /// \brief Check if all of the conditional loads are hoistable because there
+ /// exists an unconditional memory access to the same address in the loop.
+ bool canHoistAllLoads();
+};
+
+bool LoadHoisting::isHoistableLoad(Instruction *L) {
+ LoadInst *LI = dyn_cast<LoadInst>(L);
+ if (!LI)
+ return false;
+
+ CondLoadAddrSet.insert(LI->getPointerOperand());
+ return true;
+}
+
+static void addMemAccesses(BasicBlock *BB, SmallPtrSet<Value *, 8> &Set) {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
+ Instruction *I = &*BI;
+ Value *Addr = 0;
+
+ // Try a load.
+ LoadInst *LI = dyn_cast<LoadInst>(I);
+ if (LI) {
+ Addr = LI->getPointerOperand();
+ Set.insert(Addr);
+ continue;
+ }
+
+ // Try a store.
+ StoreInst *SI = dyn_cast<StoreInst>(I);
+ if (!SI)
+ continue;
+
+ Addr = SI->getPointerOperand();
+ Set.insert(Addr);
+ }
+}
+
+bool LoadHoisting::canHoistAllLoads() {
+ // No conditional loads.
+ if (CondLoadAddrSet.empty())
+ return true;
+
+ MemorySet UncondMemAccesses;
+ std::vector<BasicBlock*> &LoopBlocks = TheLoop->getBlocksVector();
+ BasicBlock *LoopLatch = TheLoop->getLoopLatch();
+
+ // Iterate over the unconditional blocks and collect memory access addresses.
+ for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
+ BasicBlock *BB = LoopBlocks[i];
+
+ // Ignore conditional blocks.
+ if (BB != LoopLatch && !DT->dominates(BB, LoopLatch))
+ continue;
+
+ addMemAccesses(BB, UncondMemAccesses);
+ }
+
+ // And make sure there is a matching unconditional access for every
+ // conditional load.
+ for (MemorySet::iterator MI = CondLoadAddrSet.begin(),
+ ME = CondLoadAddrSet.end(); MI != ME; ++MI)
+ if (!UncondMemAccesses.count(*MI))
+ return false;
+
+ return true;
+}
+
/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
/// to what vectorization factor.
/// This class does not look at the profitability of vectorization, only the
DominatorTree *DT, TargetTransformInfo* TTI,
AliasAnalysis *AA, TargetLibraryInfo *TLI)
: TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
- Induction(0) {}
+ Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false),
+ LoadSpeculation(L, DT) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
RK_IntegerXor, ///< Bitwise or logical XOR of numbers.
RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
RK_FloatAdd, ///< Sum of floats.
- RK_FloatMult ///< Product of floats.
+ RK_FloatMult, ///< Product of floats.
+ RK_FloatMinMax ///< Min/max implemented in terms of select(cmp()).
};
/// This enum represents the kinds of inductions that we support.
MRK_UIntMin,
MRK_UIntMax,
MRK_SIntMin,
- MRK_SIntMax
+ MRK_SIntMax,
+ MRK_FloatMin,
+ MRK_FloatMax
};
/// This POD struct holds information about reduction variables.
/// Returns the induction variables found in the loop.
InductionList *getInductionVars() { return &Inductions; }
+ /// Returns the widest induction type.
+ Type *getWidestInductionType() { return WidestIndTy; }
+
/// Returns True if V is an induction variable in this loop.
bool isInductionVariable(const Value *V);
/// This function returns the identity element (or neutral element) for
/// the operation K.
- static Constant *getReductionIdentity(ReductionKind K, Type *Tp,
- MinMaxReductionKind MinMaxK);
+ static Constant *getReductionIdentity(ReductionKind K, Type *Tp);
private:
/// Check if a single basic block loop is vectorizable.
/// At this point we know that this is a loop with a constant trip count
/// Notice that inductions don't need to start at zero and that induction
/// variables can be pointers.
InductionList Inductions;
+ /// Holds the widest induction type encountered.
+ Type *WidestIndTy;
/// Allowed outside users. This holds the reduction
/// vars which can be accessed from outside the loop.
/// We need to check that all of the pointers in this list are disjoint
/// at runtime.
RuntimePointerCheck PtrRtCheck;
+ /// Can we assume the absence of NaNs.
+ bool HasFunNoNaNAttr;
+
+ /// Utility to determine whether loads can be speculated.
+ LoadHoisting LoadSpeculation;
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
return Shuf;
}
-Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, unsigned StartIdx,
+Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx,
bool Negate) {
assert(Val->getType()->isVectorTy() && "Must be a vector");
assert(Val->getType()->getScalarType()->isIntegerTy() &&
// Create a vector of consecutive numbers from zero to VF.
for (int i = 0; i < VLen; ++i) {
- int Idx = Negate ? (-i): i;
- Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx));
+ int64_t Idx = Negate ? (-i) : i;
+ Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx, Negate));
}
// Add the consecutive indices to the vector value.
// Mark the old scalar loop with metadata that tells us not to vectorize this
// loop again if we run into it.
- MDNode *MD = MDNode::get(OldBasicBlock->getContext(), ArrayRef<Value*>());
+ MDNode *MD = MDNode::get(OldBasicBlock->getContext(), None);
OldBasicBlock->getTerminator()->setMetadata(AlreadyVectorizedMDName, MD);
// Some loops have a single integer induction variable, while other loops
// induction variables. In the code below we also support a case where we
// don't have a single induction variable.
OldInduction = Legal->getInduction();
- Type *IdxTy = OldInduction ? OldInduction->getType() :
- DL->getIntPtrType(SE->getContext());
+ Type *IdxTy = Legal->getWidestInductionType();
// Find the loop boundaries.
const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getLoopLatch());
// The loop index does not have to start at Zero. Find the original start
// value from the induction PHI node. If we don't have an induction variable
// then we know that it starts at zero.
- Value *StartIdx = OldInduction ?
- OldInduction->getIncomingValueForBlock(BypassBlock):
- ConstantInt::get(IdxTy, 0);
+ Builder.SetInsertPoint(BypassBlock->getTerminator());
+ Value *StartIdx = ExtendedIdx = OldInduction ?
+ Builder.CreateZExt(OldInduction->getIncomingValueForBlock(BypassBlock),
+ IdxTy):
+ ConstantInt::get(IdxTy, 0);
assert(BypassBlock && "Invalid loop structure");
LoopBypassBlocks.push_back(BypassBlock);
PHINode *ResumeIndex = 0;
LoopVectorizationLegality::InductionList::iterator I, E;
LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
+ // Set builder to point to last bypass block.
+ BypassBuilder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator());
for (I = List->begin(), E = List->end(); I != E; ++I) {
PHINode *OrigPhi = I->first;
LoopVectorizationLegality::InductionInfo II = I->second;
- PHINode *ResumeVal = PHINode::Create(OrigPhi->getType(), 2, "resume.val",
+
+ Type *ResumeValTy = (OrigPhi == OldInduction) ? IdxTy : OrigPhi->getType();
+ PHINode *ResumeVal = PHINode::Create(ResumeValTy, 2, "resume.val",
MiddleBlock->getTerminator());
+ // We might have extended the type of the induction variable but we need a
+ // truncated version for the scalar loop.
+ PHINode *TruncResumeVal = (OrigPhi == OldInduction) ?
+ PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val",
+ MiddleBlock->getTerminator()) : 0;
+
Value *EndValue = 0;
switch (II.IK) {
case LoopVectorizationLegality::IK_NoInduction:
llvm_unreachable("Unknown induction");
case LoopVectorizationLegality::IK_IntInduction: {
- // Handle the integer induction counter:
+ // Handle the integer induction counter.
assert(OrigPhi->getType()->isIntegerTy() && "Invalid type");
- assert(OrigPhi == OldInduction && "Unknown integer PHI");
- // We know what the end value is.
- EndValue = IdxEndRoundDown;
- // We also know which PHI node holds it.
- ResumeIndex = ResumeVal;
+
+ // We have the canonical induction variable.
+ if (OrigPhi == OldInduction) {
+ // Create a truncated version of the resume value for the scalar loop,
+ // we might have promoted the type to a larger width.
+ EndValue =
+ BypassBuilder.CreateTrunc(IdxEndRoundDown, OrigPhi->getType());
+ // The new PHI merges the original incoming value, in case of a bypass,
+ // or the value at the end of the vectorized loop.
+ for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
+ TruncResumeVal->addIncoming(EndValue, VecBody);
+
+ // We know what the end value is.
+ EndValue = IdxEndRoundDown;
+ // We also know which PHI node holds it.
+ ResumeIndex = ResumeVal;
+ break;
+ }
+
+ // Not the canonical induction variable - add the vector loop count to the
+ // start value.
+ Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
+ II.StartValue->getType(),
+ "cast.crd");
+ EndValue = BypassBuilder.CreateAdd(CRD, II.StartValue , "ind.end");
break;
}
case LoopVectorizationLegality::IK_ReverseIntInduction: {
// Convert the CountRoundDown variable to the PHI size.
- unsigned CRDSize = CountRoundDown->getType()->getScalarSizeInBits();
- unsigned IISize = II.StartValue->getType()->getScalarSizeInBits();
- Value *CRD = CountRoundDown;
- if (CRDSize > IISize)
- CRD = CastInst::Create(Instruction::Trunc, CountRoundDown,
- II.StartValue->getType(), "tr.crd",
- LoopBypassBlocks.back()->getTerminator());
- else if (CRDSize < IISize)
- CRD = CastInst::Create(Instruction::SExt, CountRoundDown,
- II.StartValue->getType(),
- "sext.crd",
- LoopBypassBlocks.back()->getTerminator());
- // Handle reverse integer induction counter:
- EndValue =
- BinaryOperator::CreateSub(II.StartValue, CRD, "rev.ind.end",
- LoopBypassBlocks.back()->getTerminator());
+ Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
+ II.StartValue->getType(),
+ "cast.crd");
+ // Handle reverse integer induction counter.
+ EndValue = BypassBuilder.CreateSub(II.StartValue, CRD, "rev.ind.end");
break;
}
case LoopVectorizationLegality::IK_PtrInduction: {
// For pointer induction variables, calculate the offset using
// the end index.
- EndValue =
- GetElementPtrInst::Create(II.StartValue, CountRoundDown, "ptr.ind.end",
- LoopBypassBlocks.back()->getTerminator());
+ EndValue = BypassBuilder.CreateGEP(II.StartValue, CountRoundDown,
+ "ptr.ind.end");
break;
}
case LoopVectorizationLegality::IK_ReversePtrInduction: {
// The value at the end of the loop for the reverse pointer is calculated
// by creating a GEP with a negative index starting from the start value.
Value *Zero = ConstantInt::get(CountRoundDown->getType(), 0);
- Value *NegIdx = BinaryOperator::CreateSub(Zero, CountRoundDown,
- "rev.ind.end",
- LoopBypassBlocks.back()->getTerminator());
- EndValue = GetElementPtrInst::Create(II.StartValue, NegIdx,
- "rev.ptr.ind.end",
- LoopBypassBlocks.back()->getTerminator());
+ Value *NegIdx = BypassBuilder.CreateSub(Zero, CountRoundDown,
+ "rev.ind.end");
+ EndValue = BypassBuilder.CreateGEP(II.StartValue, NegIdx,
+ "rev.ptr.ind.end");
break;
}
}// end of case
// The new PHI merges the original incoming value, in case of a bypass,
// or the value at the end of the vectorized loop.
- for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
- ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
+ for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) {
+ if (OrigPhi == OldInduction)
+ ResumeVal->addIncoming(StartIdx, LoopBypassBlocks[I]);
+ else
+ ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
+ }
ResumeVal->addIncoming(EndValue, VecBody);
// Fix the scalar body counter (PHI node).
unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
- OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
+ // The old inductions phi node in the scalar body needs the truncated value.
+ if (OrigPhi == OldInduction)
+ OrigPhi->setIncomingValue(BlockIdx, TruncResumeVal);
+ else
+ OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
}
// If we are generating a new induction variable then we also need to
/// This function returns the identity element (or neutral element) for
/// the operation K.
Constant*
-LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp,
- MinMaxReductionKind MinMaxK) {
+LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) {
switch (K) {
case RK_IntegerXor:
case RK_IntegerAdd:
case RK_FloatAdd:
// Adding zero to a number does not change it.
return ConstantFP::get(Tp, 0.0L);
- case RK_IntegerMinMax:
- switch(MinMaxK) {
- default: llvm_unreachable("Unknown min/max predicate");
- case MRK_UIntMin:
- return ConstantInt::getAllOnesValue(Tp);
- case MRK_UIntMax:
- return ConstantInt::get(Tp, 0);
- case MRK_SIntMin: {
- unsigned BitWidth = Tp->getPrimitiveSizeInBits();
- return ConstantInt::get(Tp->getContext(),
- APInt::getSignedMaxValue(BitWidth));
- }
- case LoopVectorizationLegality::MRK_SIntMax: {
- unsigned BitWidth = Tp->getPrimitiveSizeInBits();
- return ConstantInt::get(Tp->getContext(),
- APInt::getSignedMinValue(BitWidth));
- }
- }
default:
llvm_unreachable("Unknown reduction kind");
}
return Instruction::FAdd;
case LoopVectorizationLegality::RK_IntegerMinMax:
return Instruction::ICmp;
+ case LoopVectorizationLegality::RK_FloatMinMax:
+ return Instruction::FCmp;
default:
llvm_unreachable("Unknown reduction operation");
}
break;
case LoopVectorizationLegality::MRK_SIntMax:
P = CmpInst::ICMP_SGT;
+ break;
+ case LoopVectorizationLegality::MRK_FloatMin:
+ P = CmpInst::FCMP_OLT;
+ break;
+ case LoopVectorizationLegality::MRK_FloatMax:
+ P = CmpInst::FCMP_OGT;
+ break;
}
- Value *Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
+
+ Value *Cmp;
+ if (RK == LoopVectorizationLegality::MRK_FloatMin || RK == LoopVectorizationLegality::MRK_FloatMax)
+ Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
+ else
+ Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
+
Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
return Select;
}
// Find the reduction identity variable. Zero for addition, or, xor,
// one for multiplication, -1 for And.
- Constant *Iden =
- LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
- VecTy->getScalarType(),
- RdxDesc.MinMaxKind);
- Constant *Identity = ConstantVector::getSplat(VF, Iden);
-
- // This vector is the Identity vector where the first element is the
- // incoming scalar reduction.
- Value *VectorStart = Builder.CreateInsertElement(Identity,
- RdxDesc.StartValue, Zero);
+ Value *Identity;
+ Value *VectorStart;
+ if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
+ RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
+ // MinMax reduction have the start value as their identify.
+ VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue,
+ "minmax.ident");
+ } else {
+ Constant *Iden =
+ LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
+ VecTy->getScalarType());
+ Identity = ConstantVector::getSplat(VF, Iden);
+
+ // This vector is the Identity vector where the first element is the
+ // incoming scalar reduction.
+ VectorStart = Builder.CreateInsertElement(Identity,
+ RdxDesc.StartValue, Zero);
+ }
// Fix the vector-loop phi.
// We created the induction variable so we know that the
Value *ReducedPartRdx = RdxParts[0];
unsigned Op = getReductionBinOp(RdxDesc.Kind);
for (unsigned part = 1; part < UF; ++part) {
- if (Op != Instruction::ICmp)
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp)
ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
RdxParts[part], ReducedPartRdx,
"bin.rdx");
ConstantVector::get(ShuffleMask),
"rdx.shuf");
- if (Op != Instruction::ICmp)
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp)
TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
"bin.rdx");
else
// We know that all PHIs in non header blocks are converted into
// selects, so we don't have to worry about the insertion order and we
// can just use the builder.
-
// At this point we generate the predication tree. There may be
// duplications since this is a simple recursive scan, but future
// optimizations will clean it up.
- VectorParts Cond = createEdgeMask(P->getIncomingBlock(0),
- P->getParent());
- for (unsigned part = 0; part < UF; ++part) {
- VectorParts &In0 = getVectorValue(P->getIncomingValue(0));
- VectorParts &In1 = getVectorValue(P->getIncomingValue(1));
- Entry[part] = Builder.CreateSelect(Cond[part], In0[part], In1[part],
- "predphi");
+ unsigned NumIncoming = P->getNumIncomingValues();
+ assert(NumIncoming > 1 && "Invalid PHI");
+
+ // Generate a sequence of selects of the form:
+ // SELECT(Mask3, In3,
+ // SELECT(Mask2, In2,
+ // ( ...)))
+ for (unsigned In = 0; In < NumIncoming; In++) {
+ VectorParts Cond = createEdgeMask(P->getIncomingBlock(In),
+ P->getParent());
+ VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
+
+ for (unsigned part = 0; part < UF; ++part) {
+ // We don't need to 'select' the first PHI operand because it is
+ // the default value if all of the other masks don't match.
+ if (In == 0)
+ Entry[part] = In0[part];
+ else
+ // Select between the current value and the previous incoming edge
+ // based on the incoming mask.
+ Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
+ Entry[part], "predphi");
+ }
}
continue;
}
case LoopVectorizationLegality::IK_NoInduction:
llvm_unreachable("Unknown induction");
case LoopVectorizationLegality::IK_IntInduction: {
- assert(P == OldInduction && "Unexpected PHI");
- Value *Broadcasted = getBroadcastInstrs(Induction);
- // After broadcasting the induction variable we need to make the
- // vector consecutive by adding 0, 1, 2 ...
+ assert(P->getType() == II.StartValue->getType() && "Types must match");
+ Type *PhiTy = P->getType();
+ Value *Broadcasted;
+ if (P == OldInduction) {
+ // Handle the canonical induction variable. We might have had to
+ // extend the type.
+ Broadcasted = Builder.CreateTrunc(Induction, PhiTy);
+ } else {
+ // Handle other induction variables that are now based on the
+ // canonical one.
+ Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
+ "normalized.idx");
+ NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
+ Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx,
+ "offset.idx");
+ }
+ Broadcasted = getBroadcastInstrs(Broadcasted);
+ // After broadcasting the induction variable we need to make the vector
+ // consecutive by adding 0, 1, 2, etc.
for (unsigned part = 0; part < UF; ++part)
Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
continue;
case LoopVectorizationLegality::IK_PtrInduction:
case LoopVectorizationLegality::IK_ReversePtrInduction:
// Handle reverse integer and pointer inductions.
- Value *StartIdx = 0;
- // If we have a single integer induction variable then use it.
- // Otherwise, start counting at zero.
- if (OldInduction) {
- LoopVectorizationLegality::InductionInfo OldII =
- Legal->getInductionVars()->lookup(OldInduction);
- StartIdx = OldII.StartValue;
- } else {
- StartIdx = ConstantInt::get(Induction->getType(), 0);
- }
+ Value *StartIdx = ExtendedIdx;
// This is the normalized GEP that starts counting at zero.
Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
"normalized.idx");
// After broadcasting the induction variable we need to make the
// vector consecutive by adding ... -3, -2, -1, 0.
for (unsigned part = 0; part < UF; ++part)
- Entry[part] = getConsecutiveVector(Broadcasted, -VF * part, true);
+ Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part,
+ true);
continue;
}
if (!isa<BranchInst>(BB->getTerminator()))
return false;
- // We must have at most two predecessors because we need to convert
- // all PHIs to selects.
- unsigned Preds = std::distance(pred_begin(BB), pred_end(BB));
- if (Preds > 2)
- return false;
-
// We must be able to predicate all blocks that need to be predicated.
if (blockNeedsPredication(BB) && !blockCanBePredicated(BB))
return false;
return true;
}
+static Type *convertPointerToIntegerType(DataLayout &DL, Type *Ty) {
+ if (Ty->isPointerTy())
+ return DL.getIntPtrType(Ty->getContext());
+ return Ty;
+}
+
+static Type* getWiderType(DataLayout &DL, Type *Ty0, Type *Ty1) {
+ Ty0 = convertPointerToIntegerType(DL, Ty0);
+ Ty1 = convertPointerToIntegerType(DL, Ty1);
+ if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
+ return Ty0;
+ return Ty1;
+}
+
bool LoopVectorizationLegality::canVectorizeInstrs() {
BasicBlock *PreHeader = TheLoop->getLoopPreheader();
BasicBlock *Header = TheLoop->getHeader();
return false;
}
+ // Look for the attribute signaling the absence of NaNs.
+ Function &F = *Header->getParent();
+ if (F.hasFnAttribute("no-nans-fp-math"))
+ HasFunNoNaNAttr = F.getAttributes().getAttribute(
+ AttributeSet::FunctionIndex,
+ "no-nans-fp-math").getValueAsString() == "true";
+
// For each block in the loop.
for (Loop::block_iterator bb = TheLoop->block_begin(),
be = TheLoop->block_end(); bb != be; ++bb) {
++it) {
if (PHINode *Phi = dyn_cast<PHINode>(it)) {
- // This should not happen because the loop should be normalized.
- if (Phi->getNumIncomingValues() != 2) {
- DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
- return false;
- }
-
+ Type *PhiTy = Phi->getType();
// Check that this PHI type is allowed.
- if (!Phi->getType()->isIntegerTy() &&
- !Phi->getType()->isFloatingPointTy() &&
- !Phi->getType()->isPointerTy()) {
+ if (!PhiTy->isIntegerTy() &&
+ !PhiTy->isFloatingPointTy() &&
+ !PhiTy->isPointerTy()) {
DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
return false;
}
if (*bb != Header)
continue;
+ // We only allow if-converted PHIs with more than two incoming values.
+ if (Phi->getNumIncomingValues() != 2) {
+ DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
+ return false;
+ }
+
// This is the value coming from the preheader.
Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
// Check if this is an induction variable.
InductionKind IK = isInductionVariable(Phi);
if (IK_NoInduction != IK) {
+ // Get the widest type.
+ if (!WidestIndTy)
+ WidestIndTy = convertPointerToIntegerType(*DL, PhiTy);
+ else
+ WidestIndTy = getWiderType(*DL, PhiTy, WidestIndTy);
+
// Int inductions are special because we only allow one IV.
if (IK == IK_IntInduction) {
- if (Induction) {
- DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n");
- return false;
- }
- Induction = Phi;
+ // Use the phi node with the widest type as induction. Use the last
+ // one if there are multiple (no good reason for doing this other
+ // than it is expedient).
+ if (!Induction || PhiTy == WidestIndTy)
+ Induction = Phi;
}
DEBUG(dbgs() << "LV: Found an induction variable.\n");
DEBUG(dbgs() << "LV: Found an FAdd reduction PHI."<< *Phi <<"\n");
continue;
}
+ if (AddReductionVar(Phi, RK_FloatMinMax)) {
+ DEBUG(dbgs() << "LV: Found an float MINMAX reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
return false;
if (!Induction) {
DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
- assert(getInductionVars()->size() && "No induction variables");
+ if (Inductions.empty())
+ return false;
}
return true;
Inst,
WriteObjects,
MaxByteWidth)) {
- DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
- << *UI <<"\n");
+ DEBUG(dbgs() << "LV: Found a possible write-write reorder:" << **UI
+ << "\n");
return false;
}
Inst,
WriteObjects,
MaxByteWidth)) {
- DEBUG(dbgs() << "LV: Found a possible read-write reorder:"
- << *UI <<"\n");
+ DEBUG(dbgs() << "LV: Found a possible read-write reorder:" << **UI
+ << "\n");
return false;
}
}
return true;
}
+static bool hasMultipleUsesOf(Instruction *I,
+ SmallPtrSet<Instruction *, 8> &Insts) {
+ unsigned NumUses = 0;
+ for(User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) {
+ if (Insts.count(dyn_cast<Instruction>(*Use)))
+ ++NumUses;
+ if (NumUses > 1)
+ return true;
+ }
+
+ return false;
+}
+
+static bool areAllUsesIn(Instruction *I, SmallPtrSet<Instruction *, 8> &Set) {
+ for(User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
+ if (!Set.count(dyn_cast<Instruction>(*Use)))
+ return false;
+ return true;
+}
+
bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
ReductionKind Kind) {
if (Phi->getNumIncomingValues() != 2)
// This includes users of the reduction, variables (which form a cycle
// which ends in the phi node).
Instruction *ExitInstruction = 0;
- // Indicates that we found a binary operation in our scan.
- bool FoundBinOp = false;
+ // Indicates that we found a reduction operation in our scan.
+ bool FoundReduxOp = false;
- // Iter is our iterator. We start with the PHI node and scan for all of the
- // users of this instruction. All users must be instructions that can be
- // used as reduction variables (such as ADD). We may have a single
- // out-of-block user. The cycle must end with the original PHI.
- Instruction *Iter = Phi;
+ // We start with the PHI node and scan for all of the users of this
+ // instruction. All users must be instructions that can be used as reduction
+ // variables (such as ADD). We must have a single out-of-block user. The cycle
+ // must include the original PHI.
+ bool FoundStartPHI = false;
// To recognize min/max patterns formed by a icmp select sequence, we store
// the number of instruction we saw from the recognized min/max pattern,
- // such that we don't stop when we see the phi has two uses (one by the select
- // and one by the icmp) and to make sure we only see exactly the two
- // instructions.
- unsigned NumICmpSelectPatternInst = 0;
+ // to make sure we only see exactly the two instructions.
+ unsigned NumCmpSelectPatternInst = 0;
ReductionInstDesc ReduxDesc(false, 0);
- // Avoid cycles in the chain.
SmallPtrSet<Instruction *, 8> VisitedInsts;
- while (VisitedInsts.insert(Iter)) {
- // If the instruction has no users then this is a broken
- // chain and can't be a reduction variable.
- if (Iter->use_empty())
+ SmallVector<Instruction *, 8> Worklist;
+ Worklist.push_back(Phi);
+ VisitedInsts.insert(Phi);
+
+ // A value in the reduction can be used:
+ // - By the reduction:
+ // - Reduction operation:
+ // - One use of reduction value (safe).
+ // - Multiple use of reduction value (not safe).
+ // - PHI:
+ // - All uses of the PHI must be the reduction (safe).
+ // - Otherwise, not safe.
+ // - By one instruction outside of the loop (safe).
+ // - By further instructions outside of the loop (not safe).
+ // - By an instruction that is not part of the reduction (not safe).
+ // This is either:
+ // * An instruction type other than PHI or the reduction operation.
+ // * A PHI in the header other than the initial PHI.
+ while (!Worklist.empty()) {
+ Instruction *Cur = Worklist.back();
+ Worklist.pop_back();
+
+ // No Users.
+ // If the instruction has no users then this is a broken chain and can't be
+ // a reduction variable.
+ if (Cur->use_empty())
return false;
- // Did we find a user inside this loop already ?
- bool FoundInBlockUser = false;
- // Did we reach the initial PHI node already ?
- bool FoundStartPHI = false;
+ bool IsAPhi = isa<PHINode>(Cur);
- // Is this a bin op ?
- FoundBinOp |= !isa<PHINode>(Iter);
+ // A header PHI use other than the original PHI.
+ if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
+ return false;
- // For each of the *users* of iter.
- for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end();
- it != e; ++it) {
- Instruction *U = cast<Instruction>(*it);
- // We already know that the PHI is a user.
- if (U == Phi) {
- FoundStartPHI = true;
- continue;
- }
+ // Reductions of instructions such as Div, and Sub is only possible if the
+ // LHS is the reduction variable.
+ if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
+ !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
+ !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
+ return false;
+
+ // Any reduction instruction must be of one of the allowed kinds.
+ ReduxDesc = isReductionInstr(Cur, Kind, ReduxDesc);
+ if (!ReduxDesc.IsReduction)
+ return false;
+
+ // A reduction operation must only have one use of the reduction value.
+ if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
+ hasMultipleUsesOf(Cur, VisitedInsts))
+ return false;
+
+ // All inputs to a PHI node must be a reduction value.
+ if(IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
+ return false;
+
+ if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(Cur) ||
+ isa<SelectInst>(Cur)))
+ ++NumCmpSelectPatternInst;
+ if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) ||
+ isa<SelectInst>(Cur)))
+ ++NumCmpSelectPatternInst;
+
+ // Check whether we found a reduction operator.
+ FoundReduxOp |= !IsAPhi;
+
+ // Process users of current instruction. Push non PHI nodes after PHI nodes
+ // onto the stack. This way we are going to have seen all inputs to PHI
+ // nodes once we get to them.
+ SmallVector<Instruction *, 8> NonPHIs;
+ SmallVector<Instruction *, 8> PHIs;
+ for (Value::use_iterator UI = Cur->use_begin(), E = Cur->use_end(); UI != E;
+ ++UI) {
+ Instruction *Usr = cast<Instruction>(*UI);
// Check if we found the exit user.
- BasicBlock *Parent = U->getParent();
+ BasicBlock *Parent = Usr->getParent();
if (!TheLoop->contains(Parent)) {
// Exit if you find multiple outside users.
if (ExitInstruction != 0)
return false;
- ExitInstruction = Iter;
- }
-
- // We allow in-loop PHINodes which are not the original reduction PHI
- // node. If this PHI is the only user of Iter (happens in IF w/ no ELSE
- // structure) then don't skip this PHI.
- if (isa<PHINode>(Iter) && isa<PHINode>(U) &&
- U->getParent() != TheLoop->getHeader() &&
- TheLoop->contains(U) &&
- Iter->hasNUsesOrMore(2))
+ ExitInstruction = Cur;
continue;
+ }
- // We can't have multiple inside users except for a combination of
- // icmp/select both using the phi.
- if (FoundInBlockUser && !NumICmpSelectPatternInst)
- return false;
- FoundInBlockUser = true;
-
- // Any reduction instr must be of one of the allowed kinds.
- ReduxDesc = isReductionInstr(U, Kind, ReduxDesc);
- if (!ReduxDesc.IsReduction)
- return false;
+ // Process instructions only once (termination).
+ if (VisitedInsts.insert(Usr)) {
+ if (isa<PHINode>(Usr))
+ PHIs.push_back(Usr);
+ else
+ NonPHIs.push_back(Usr);
+ }
+ // Remember that we completed the cycle.
+ if (Usr == Phi)
+ FoundStartPHI = true;
+ }
+ Worklist.append(PHIs.begin(), PHIs.end());
+ Worklist.append(NonPHIs.begin(), NonPHIs.end());
+ }
- if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(U) ||
- isa<SelectInst>(U)))
- ++NumICmpSelectPatternInst;
+ // This means we have seen one but not the other instruction of the
+ // pattern or more than just a select and cmp.
+ if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
+ NumCmpSelectPatternInst != 2)
+ return false;
- // Reductions of instructions such as Div, and Sub is only
- // possible if the LHS is the reduction variable.
- if (!U->isCommutative() && !isa<PHINode>(U) && !isa<SelectInst>(U) &&
- !isa<ICmpInst>(U) && U->getOperand(0) != Iter)
- return false;
+ if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
+ return false;
- Iter = ReduxDesc.PatternLastInst;
- }
+ // We found a reduction var if we have reached the original phi node and we
+ // only have a single instruction with out-of-loop users.
- // This means we have seen one but not the other instruction of the
- // pattern or more than just a select and cmp.
- if (Kind == RK_IntegerMinMax && NumICmpSelectPatternInst != 2)
- return false;
+ // This instruction is allowed to have out-of-loop users.
+ AllowedExit.insert(ExitInstruction);
- // We found a reduction var if we have reached the original
- // phi node and we only have a single instruction with out-of-loop
- // users.
- if (FoundStartPHI) {
- // This instruction is allowed to have out-of-loop users.
- AllowedExit.insert(ExitInstruction);
-
- // Save the description of this reduction variable.
- ReductionDescriptor RD(RdxStart, ExitInstruction, Kind,
- ReduxDesc.MinMaxKind);
- Reductions[Phi] = RD;
- // We've ended the cycle. This is a reduction variable if we have an
- // outside user and it has a binary op.
- return FoundBinOp && ExitInstruction;
- }
- }
+ // Save the description of this reduction variable.
+ ReductionDescriptor RD(RdxStart, ExitInstruction, Kind,
+ ReduxDesc.MinMaxKind);
+ Reductions[Phi] = RD;
+ // We've ended the cycle. This is a reduction variable if we have an
+ // outside user and it has a binary op.
- return false;
+ return true;
}
/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
/// pattern corresponding to a min(X, Y) or max(X, Y).
LoopVectorizationLegality::ReductionInstDesc
-LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I, ReductionInstDesc &Prev) {
+LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I,
+ ReductionInstDesc &Prev) {
- assert((isa<ICmpInst>(I) || isa<SelectInst>(I)) &&
+ assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
"Expect a select instruction");
- ICmpInst *Cmp = 0;
+ Instruction *Cmp = 0;
SelectInst *Select = 0;
// We must handle the select(cmp()) as a single instruction. Advance to the
// select.
- if ((Cmp = dyn_cast<ICmpInst>(I))) {
+ if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->use_begin())))
return ReductionInstDesc(false, I);
return ReductionInstDesc(Select, Prev.MinMaxKind);
// Only handle single use cases for now.
if (!(Select = dyn_cast<SelectInst>(I)))
return ReductionInstDesc(false, I);
- if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))))
+ if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
+ !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
return ReductionInstDesc(false, I);
if (!Cmp->hasOneUse())
return ReductionInstDesc(false, I);
- Value *CmpLeft = Cmp->getOperand(0);
- Value *CmpRight = Cmp->getOperand(1);
+ Value *CmpLeft;
+ Value *CmpRight;
// Look for a min/max pattern.
if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
return ReductionInstDesc(Select, MRK_SIntMax);
else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
return ReductionInstDesc(Select, MRK_SIntMin);
+ else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_FloatMin);
+ else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_FloatMax);
+ else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_FloatMin);
+ else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_FloatMax);
return ReductionInstDesc(false, I);
}
default:
return ReductionInstDesc(false, I);
case Instruction::PHI:
- if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd))
+ if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd &&
+ Kind != RK_FloatMinMax))
return ReductionInstDesc(false, I);
return ReductionInstDesc(I, Prev.MinMaxKind);
case Instruction::Sub:
return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I);
case Instruction::FAdd:
return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I);
+ case Instruction::FCmp:
case Instruction::ICmp:
case Instruction::Select:
- if (Kind != RK_IntegerMinMax)
+ if (Kind != RK_IntegerMinMax &&
+ (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
return ReductionInstDesc(false, I);
return isMinMaxSelectCmpPattern(I, Prev);
}
bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
- // We don't predicate loads/stores at the moment.
- if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow())
+ // We might be able to hoist the load.
+ if (it->mayReadFromMemory() && !LoadSpeculation.isHoistableLoad(it))
+ return false;
+
+ // We predicate stores at the moment.
+ if (it->mayWriteToMemory() || it->mayThrow())
return false;
// The instructions below can trap.
}
}
+ // Check that we can actually speculate the hoistable loads.
+ if (!LoadSpeculation.canHoistAllLoads())
+ return false;
+
return true;
}