#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
class LoopVectorizationLegality {
public:
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DataLayout *DL,
- DominatorTree *DT, TargetLibraryInfo *TLI)
- : TheLoop(L), SE(SE), DL(DL), DT(DT), TLI(TLI),
+ DominatorTree *DT, TargetTransformInfo* TTI,
+ AliasAnalysis *AA, TargetLibraryInfo *TLI)
+ : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false),
- MaxSafeDepDistBytes(-1U), LoadSpeculation(L, DT) {}
+ LoadSpeculation(L, DT) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
}
/// Insert a pointer and calculate the start and end SCEVs.
- void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr,
- unsigned DepSetId);
+ void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr);
/// This flag indicates if we need to add the runtime check.
bool Need;
SmallVector<const SCEV*, 2> Ends;
/// Holds the information if this pointer is used for writing to memory.
SmallVector<bool, 2> IsWritePtr;
- /// Holds the id of the set of pointers that could be dependent because of a
- /// shared underlying object.
- SmallVector<unsigned, 2> DependencySetId;
};
/// A POD for saving information about induction variables.
/// induction descriptor.
typedef MapVector<PHINode*, InductionInfo> InductionList;
+ /// Alias(Multi)Map stores the values (GEPs or underlying objects and their
+ /// respective Store/Load instruction(s) to calculate aliasing.
+ typedef MapVector<Value*, Instruction* > AliasMap;
+ typedef DenseMap<Value*, std::vector<Instruction*> > AliasMultiMap;
+
/// Returns true if it is legal to vectorize this loop.
/// This does not mean that it is profitable to vectorize this
/// loop, only that it is legal to do so.
/// This function returns the identity element (or neutral element) for
/// the operation K.
static Constant *getReductionIdentity(ReductionKind K, Type *Tp);
-
- unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
-
private:
/// Check if a single basic block loop is vectorizable.
/// At this point we know that this is a loop with a constant trip count
/// Returns the induction kind of Phi. This function may return NoInduction
/// if the PHI is not an induction variable.
InductionKind isInductionVariable(PHINode *Phi);
+ /// Return true if can compute the address bounds of Ptr within the loop.
+ bool hasComputableBounds(Value *Ptr);
+ /// Return true if there is the chance of write reorder.
+ bool hasPossibleGlobalWriteReorder(Value *Object,
+ Instruction *Inst,
+ AliasMultiMap &WriteObjects,
+ unsigned MaxByteWidth);
+ /// Return the AA location for a load or a store.
+ AliasAnalysis::Location getLoadStoreLocation(Instruction *Inst);
+
/// The loop that we evaluate.
Loop *TheLoop;
DataLayout *DL;
/// Dominators.
DominatorTree *DT;
+ /// Target Info.
+ TargetTransformInfo *TTI;
+ /// Alias Analysis.
+ AliasAnalysis *AA;
/// Target Library Info.
TargetLibraryInfo *TLI;
/// Can we assume the absence of NaNs.
bool HasFunNoNaNAttr;
- unsigned MaxSafeDepDistBytes;
-
/// Utility to determine whether loads can be speculated.
LoadHoisting LoadSpeculation;
};
LoopInfo *LI;
TargetTransformInfo *TTI;
DominatorTree *DT;
+ AliasAnalysis *AA;
TargetLibraryInfo *TLI;
virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
LI = &getAnalysis<LoopInfo>();
TTI = &getAnalysis<TargetTransformInfo>();
DT = &getAnalysis<DominatorTree>();
+ AA = getAnalysisIfAvailable<AliasAnalysis>();
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
if (DL == NULL) {
}
// Check if it is legal to vectorize the loop.
- LoopVectorizationLegality LVL(L, SE, DL, DT, TLI);
+ LoopVectorizationLegality LVL(L, SE, DL, DT, TTI, AA, TLI);
if (!LVL.canVectorize()) {
DEBUG(dbgs() << "LV: Not vectorizing.\n");
return false;
void
LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
Loop *Lp, Value *Ptr,
- bool WritePtr,
- unsigned DepSetId) {
+ bool WritePtr) {
const SCEV *Sc = SE->getSCEV(Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
assert(AR && "Invalid addrec expression");
Starts.push_back(AR->getStart());
Ends.push_back(ScEnd);
IsWritePtr.push_back(WritePtr);
- DependencySetId.push_back(DepSetId);
}
Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
if (!PtrRtCheck->Need)
return NULL;
+ Instruction *MemoryRuntimeCheck = 0;
unsigned NumPointers = PtrRtCheck->Pointers.size();
- SmallVector<TrackingVH<Value> , 2> Starts;
- SmallVector<TrackingVH<Value> , 2> Ends;
+ SmallVector<Value* , 2> Starts;
+ SmallVector<Value* , 2> Ends;
SCEVExpander Exp(*SE, "induction");
}
IRBuilder<> ChkBuilder(Loc);
- // Our instructions might fold to a constant.
- Value *MemoryRuntimeCheck = 0;
+
for (unsigned i = 0; i < NumPointers; ++i) {
for (unsigned j = i+1; j < NumPointers; ++j) {
// No need to check if two readonly pointers intersect.
if (!PtrRtCheck->IsWritePtr[i] && !PtrRtCheck->IsWritePtr[j])
continue;
- // Only need to check pointers between two different dependency sets.
- if (PtrRtCheck->DependencySetId[i] == PtrRtCheck->DependencySetId[j])
- continue;
-
Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy, "bc");
Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy, "bc");
Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy, "bc");
if (MemoryRuntimeCheck)
IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
"conflict.rdx");
- MemoryRuntimeCheck = IsConflict;
+
+ MemoryRuntimeCheck = cast<Instruction>(IsConflict);
}
}
- // We have to do this trickery because the IRBuilder might fold the check to a
- // constant expression in which case there is no Instruction anchored in a
- // the block.
- LLVMContext &Ctx = Loc->getContext();
- Instruction * Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
- ConstantInt::getTrue(Ctx));
- ChkBuilder.Insert(Check, "memcheck.conflict");
- return Check;
+ return MemoryRuntimeCheck;
}
void
// Each access has its own dependence set.
DepId = RunningDepId++;
- RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId);
+ //RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId);
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr <<"\n");
} else {
return true;
}
+AliasAnalysis::Location
+LoopVectorizationLegality::getLoadStoreLocation(Instruction *Inst) {
+ if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
+ return AA->getLocation(Store);
+ else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
+ return AA->getLocation(Load);
+
+ llvm_unreachable("Should be either load or store instruction");
+}
+
+bool
+LoopVectorizationLegality::hasPossibleGlobalWriteReorder(
+ Value *Object,
+ Instruction *Inst,
+ AliasMultiMap& WriteObjects,
+ unsigned MaxByteWidth) {
+
+ AliasAnalysis::Location ThisLoc = getLoadStoreLocation(Inst);
+
+ std::vector<Instruction*>::iterator
+ it = WriteObjects[Object].begin(),
+ end = WriteObjects[Object].end();
+
+ for (; it != end; ++it) {
+ Instruction* I = *it;
+ if (I == Inst)
+ continue;
+
+ AliasAnalysis::Location ThatLoc = getLoadStoreLocation(I);
+ if (AA->alias(ThisLoc.getWithNewSize(MaxByteWidth),
+ ThatLoc.getWithNewSize(MaxByteWidth)))
+ return true;
+ }
+ return false;
+}
+
bool LoopVectorizationLegality::canVectorizeMemory() {
typedef SmallVector<Value*, 16> ValueVector;
typedef SmallPtrSet<Value*, 16> ValueSet;
-
- // Stores a pair of memory access location and whether the access is a store
- // (true) or a load (false).
- typedef std::pair<Value*, char> MemAccessInfo;
- typedef DenseSet<MemAccessInfo> PtrAccessSet;
-
// Holds the Load and Store *instructions*.
ValueVector Loads;
ValueVector Stores;
-
- // Holds all the different accesses in the loop.
- unsigned NumReads = 0;
- unsigned NumReadWrites = 0;
-
PtrRtCheck.Pointers.clear();
PtrRtCheck.Need = false;
const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
- MemoryDepChecker DepChecker(SE, DL, TheLoop);
// For each block.
for (Loop::block_iterator bb = TheLoop->block_begin(),
return false;
}
Loads.push_back(Ld);
- DepChecker.addAccess(Ld);
continue;
}
return false;
}
Stores.push_back(St);
- DepChecker.addAccess(St);
}
} // next instr.
} // next block.
return true;
}
- AccessAnalysis::DepCandidates DependentAccesses;
- AccessAnalysis Accesses(DL, DependentAccesses);
+ // Holds the read and read-write *pointers* that we find. These maps hold
+ // unique values for pointers (so no need for multi-map).
+ AliasMap Reads;
+ AliasMap ReadWrites;
// Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
// multiple times on the same object. If the ptr is accessed twice, once
return false;
}
- // If we did *not* see this pointer before, insert it to the read-write
- // list. At this phase it is only a 'write' list.
- if (Seen.insert(Ptr)) {
- ++NumReadWrites;
- Accesses.addStore(Ptr);
- }
+ // If we did *not* see this pointer before, insert it to
+ // the read-write list. At this phase it is only a 'write' list.
+ if (Seen.insert(Ptr))
+ ReadWrites.insert(std::make_pair(Ptr, ST));
}
if (IsAnnotatedParallel) {
return true;
}
- SmallPtrSet<Value *, 16> ReadOnlyPtr;
for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
LoadInst *LD = cast<LoadInst>(*I);
Value* Ptr = LD->getPointerOperand();
// If the address of i is unknown (for example A[B[i]]) then we may
// read a few words, modify, and write a few words, and some of the
// words may be written to the same address.
- bool IsReadOnlyPtr = false;
- if (Seen.insert(Ptr) || !isStridedPtr(SE, DL, Ptr, TheLoop)) {
- ++NumReads;
- IsReadOnlyPtr = true;
- }
- Accesses.addLoad(Ptr, IsReadOnlyPtr);
+ if (Seen.insert(Ptr) || 0 == isConsecutivePtr(Ptr))
+ Reads.insert(std::make_pair(Ptr, LD));
}
// If we write (or read-write) to a single destination and there are no
// other reads in this loop then is it safe to vectorize.
- if (NumReadWrites == 1 && NumReads == 0) {
+ if (ReadWrites.size() == 1 && Reads.size() == 0) {
DEBUG(dbgs() << "LV: Found a write-only loop!\n");
return true;
}
- // Build dependence sets and check whether we need a runtime pointer bounds
- // check.
- Accesses.buildDependenceSets();
- bool NeedRTCheck = Accesses.isRTCheckNeeded();
+ unsigned NumReadPtrs = 0;
+ unsigned NumWritePtrs = 0;
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
- unsigned NumComparisons = 0;
- bool CanDoRT = false;
- if (NeedRTCheck)
- CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop);
-
-
- DEBUG(dbgs() << "LV: We need to do " << NumComparisons <<
- " pointer comparisons.\n");
-
- // If we only have one set of dependences to check pointers among we don't
- // need a runtime check.
- if (NumComparisons == 0 && NeedRTCheck)
- NeedRTCheck = false;
+ bool CanDoRT = true;
+ AliasMap::iterator MI, ME;
+ for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) {
+ Value *V = (*MI).first;
+ if (hasComputableBounds(V)) {
+ PtrRtCheck.insert(SE, TheLoop, V, true);
+ NumWritePtrs++;
+ DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
+ } else {
+ CanDoRT = false;
+ break;
+ }
+ }
+ for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) {
+ Value *V = (*MI).first;
+ if (hasComputableBounds(V)) {
+ PtrRtCheck.insert(SE, TheLoop, V, false);
+ NumReadPtrs++;
+ DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
+ } else {
+ CanDoRT = false;
+ break;
+ }
+ }
- // Check that we did not collect too many pointers or found a unsizeable
- // pointer.
+ // Check that we did not collect too many pointers or found a
+ // unsizeable pointer.
+ unsigned NumComparisons = (NumWritePtrs * (NumReadPtrs + NumWritePtrs - 1));
+ DEBUG(dbgs() << "LV: We need to compare " << NumComparisons << " ptrs.\n");
if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
PtrRtCheck.reset();
CanDoRT = false;
DEBUG(dbgs() << "LV: We can perform a memory runtime check if needed.\n");
}
+ bool NeedRTCheck = false;
+
+ // Biggest vectorized access possible, vector width * unroll factor.
+ // TODO: We're being very pessimistic here, find a way to know the
+ // real access width before getting here.
+ unsigned MaxByteWidth = (TTI->getRegisterBitWidth(true) / 8) *
+ TTI->getMaximumUnrollFactor();
+ // Now that the pointers are in two lists (Reads and ReadWrites), we
+ // can check that there are no conflicts between each of the writes and
+ // between the writes to the reads.
+ // Note that WriteObjects duplicates the stores (indexed now by underlying
+ // objects) to avoid pointing to elements inside ReadWrites.
+ // TODO: Maybe create a new type where they can interact without duplication.
+ AliasMultiMap WriteObjects;
+ ValueVector TempObjects;
+
+ // Check that the read-writes do not conflict with other read-write
+ // pointers.
+ bool AllWritesIdentified = true;
+ for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) {
+ Value *Val = (*MI).first;
+ Instruction *Inst = (*MI).second;
+
+ GetUnderlyingObjects(Val, TempObjects, DL);
+ for (ValueVector::iterator UI=TempObjects.begin(), UE=TempObjects.end();
+ UI != UE; ++UI) {
+ if (!isIdentifiedObject(*UI)) {
+ DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **UI <<"\n");
+ NeedRTCheck = true;
+ AllWritesIdentified = false;
+ }
+
+ // Never seen it before, can't alias.
+ if (WriteObjects[*UI].empty()) {
+ DEBUG(dbgs() << "LV: Adding Underlying value:" << **UI <<"\n");
+ WriteObjects[*UI].push_back(Inst);
+ continue;
+ }
+ // Direct alias found.
+ if (!AA || dyn_cast<GlobalValue>(*UI) == NULL) {
+ DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
+ << **UI <<"\n");
+ return false;
+ }
+ DEBUG(dbgs() << "LV: Found a conflicting global value:"
+ << **UI <<"\n");
+ DEBUG(dbgs() << "LV: While examining store:" << *Inst <<"\n");
+ DEBUG(dbgs() << "LV: On value:" << *Val <<"\n");
+
+ // If global alias, make sure they do alias.
+ if (hasPossibleGlobalWriteReorder(*UI,
+ Inst,
+ WriteObjects,
+ MaxByteWidth)) {
+ DEBUG(dbgs() << "LV: Found a possible write-write reorder:" << **UI
+ << "\n");
+ return false;
+ }
+
+ // Didn't alias, insert into map for further reference.
+ WriteObjects[*UI].push_back(Inst);
+ }
+ TempObjects.clear();
+ }
+
+ /// Check that the reads don't conflict with the read-writes.
+ for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) {
+ Value *Val = (*MI).first;
+ GetUnderlyingObjects(Val, TempObjects, DL);
+ for (ValueVector::iterator UI=TempObjects.begin(), UE=TempObjects.end();
+ UI != UE; ++UI) {
+ // If all of the writes are identified then we don't care if the read
+ // pointer is identified or not.
+ if (!AllWritesIdentified && !isIdentifiedObject(*UI)) {
+ DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **UI <<"\n");
+ NeedRTCheck = true;
+ }
+
+ // Never seen it before, can't alias.
+ if (WriteObjects[*UI].empty())
+ continue;
+ // Direct alias found.
+ if (!AA || dyn_cast<GlobalValue>(*UI) == NULL) {
+ DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
+ << **UI <<"\n");
+ return false;
+ }
+ DEBUG(dbgs() << "LV: Found a global value: "
+ << **UI <<"\n");
+ Instruction *Inst = (*MI).second;
+ DEBUG(dbgs() << "LV: While examining load:" << *Inst <<"\n");
+ DEBUG(dbgs() << "LV: On value:" << *Val <<"\n");
+
+ // If global alias, make sure they do alias.
+ if (hasPossibleGlobalWriteReorder(*UI,
+ Inst,
+ WriteObjects,
+ MaxByteWidth)) {
+ DEBUG(dbgs() << "LV: Found a possible read-write reorder:" << **UI
+ << "\n");
+ return false;
+ }
+ }
+ TempObjects.clear();
+ }
+
+ PtrRtCheck.Need = NeedRTCheck;
if (NeedRTCheck && !CanDoRT) {
DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
"the array bounds.\n");
return false;
}
- PtrRtCheck.Need = NeedRTCheck;
-
- bool CanVecMem = true;
- if (Accesses.isDependencyCheckNeeded()) {
- DEBUG(dbgs() << "LV: Checking memory dependencies\n");
- CanVecMem = DepChecker.areDepsSafe(DependentAccesses,
- Accesses.getDependenciesToCheck());
- MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
- }
-
DEBUG(dbgs() << "LV: We "<< (NeedRTCheck ? "" : "don't") <<
" need a runtime memory check.\n");
-
- return CanVecMem;
+ return true;
}
static bool hasMultipleUsesOf(Instruction *I,
return true;
}
+bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) {
+ const SCEV *PhiScev = SE->getSCEV(Ptr);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+ if (!AR)
+ return false;
+
+ return AR->isAffine();
+}
+
LoopVectorizationCostModel::VectorizationFactor
LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
unsigned UserVF) {
unsigned WidestType = getWidestType();
unsigned WidestRegister = TTI.getRegisterBitWidth(true);
- unsigned MaxSafeDepDist = -1U;
- if (Legal->getMaxSafeDepDistBytes() != -1U)
- MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8;
- WidestRegister = WidestRegister < MaxSafeDepDist ? WidestRegister : MaxSafeDepDist;
unsigned MaxVectorSize = WidestRegister / WidestType;
DEBUG(dbgs() << "LV: The Widest type: " << WidestType << " bits.\n");
DEBUG(dbgs() << "LV: The Widest register is:" << WidestRegister << "bits.\n");
if (OptForSize)
return 1;
- // We used the distance for the unroll factor.
- if (Legal->getMaxSafeDepDistBytes() != -1U)
- return 1;
-
// Do not unroll loops with a relatively small trip count.
unsigned TC = SE->getSmallConstantTripCount(TheLoop,
TheLoop->getLoopLatch());
char LoopVectorize::ID = 0;
static const char lv_name[] = "Loop Vectorization";
INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+++ /dev/null
-; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S | FileCheck %s
-; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -S | FileCheck %s -check-prefix=WIDTH
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-
-; Vectorization with dependence checks.
-
-; No plausible dependence - can be vectorized.
-; for (i = 0; i < 1024; ++i)
-; A[i] = A[i + 1] + 1;
-
-; CHECK: f1_vec
-; CHECK: <2 x i32>
-
-define void @f1_vec(i32* %A) {
-entry:
- br label %for.body
-
-for.body:
- %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %indvars.iv.next = add i32 %indvars.iv, 1
- %arrayidx = getelementptr inbounds i32* %A, i32 %indvars.iv.next
- %0 = load i32* %arrayidx, align 4
- %add1 = add nsw i32 %0, 1
- %arrayidx3 = getelementptr inbounds i32* %A, i32 %indvars.iv
- store i32 %add1, i32* %arrayidx3, align 4
- %exitcond = icmp ne i32 %indvars.iv.next, 1024
- br i1 %exitcond, label %for.body, label %for.end
-
-for.end:
- ret void
-}
-
-; Plausible dependence of distance 1 - can't be vectorized.
-; for (i = 0; i < 1024; ++i)
-; A[i+1] = A[i] + 1;
-
-; CHECK: f2_novec
-; CHECK-NOT: <2 x i32>
-
-define void @f2_novec(i32* %A) {
-entry:
- br label %for.body
-
-for.body:
- %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds i32* %A, i32 %indvars.iv
- %0 = load i32* %arrayidx, align 4
- %add = add nsw i32 %0, 1
- %indvars.iv.next = add i32 %indvars.iv, 1
- %arrayidx3 = getelementptr inbounds i32* %A, i32 %indvars.iv.next
- store i32 %add, i32* %arrayidx3, align 4
- %exitcond = icmp ne i32 %indvars.iv.next, 1024
- br i1 %exitcond, label %for.body, label %for.end
-
-for.end:
- ret void
-}
-
-; Plausible dependence of distance 2 - can be vectorized with a width of 2.
-; for (i = 0; i < 1024; ++i)
-; A[i+2] = A[i] + 1;
-
-; CHECK: f3_vec_len
-; CHECK: <2 x i32>
-
-; WIDTH: f3_vec_len
-; WIDTH-NOT: <4 x i32>
-
-define void @f3_vec_len(i32* %A) {
-entry:
- br label %for.body
-
-for.body:
- %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %idxprom = sext i32 %i.01 to i64
- %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
- %0 = load i32* %arrayidx, align 4
- %add = add nsw i32 %0, 1
- %add1 = add nsw i32 %i.01, 2
- %idxprom2 = sext i32 %add1 to i64
- %arrayidx3 = getelementptr inbounds i32* %A, i64 %idxprom2
- store i32 %add, i32* %arrayidx3, align 4
- %inc = add nsw i32 %i.01, 1
- %cmp = icmp slt i32 %inc, 1024
- br i1 %cmp, label %for.body, label %for.end
-
-for.end:
- ret void
-}
-
-; Plausible dependence of distance 1 - cannot be vectorized (without reordering
-; accesses).
-; for (i = 0; i < 1024; ++i) {
-; B[i] = A[i];
-; A[i] = B[i + 1];
-; }
-
-; CHECK: f5
-; CHECK-NOT: <2 x i32>
-
-define void @f5(i32* %A, i32* %B) {
-entry:
- br label %for.body
-
-for.body:
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
- %0 = load i32* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv
- store i32 %0, i32* %arrayidx2, align 4
- %indvars.iv.next = add nsw i64 %indvars.iv, 1
- %arrayidx4 = getelementptr inbounds i32* %B, i64 %indvars.iv.next
- %1 = load i32* %arrayidx4, align 4
- store i32 %1, i32* %arrayidx, align 4
- %lftr.wideiv = trunc i64 %indvars.iv.next to i32
- %exitcond = icmp ne i32 %lftr.wideiv, 1024
- br i1 %exitcond, label %for.body, label %for.end
-
-for.end:
- ret void
-}
-
-; Dependence through a phi node - must not vectorize.
-; for (i = 0; i < 1024; ++i) {
-; a[i+1] = tmp;
-; tmp = a[i];
-; }
-
-; CHECK: f6
-; CHECK-NOT: <2 x i32>
-
-define i32 @f6(i32* %a, i32 %tmp) {
-entry:
- br label %for.body
-
-for.body:
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %tmp.addr.08 = phi i32 [ %tmp, %entry ], [ %0, %for.body ]
- %indvars.iv.next = add nsw i64 %indvars.iv, 1
- %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv.next
- store i32 %tmp.addr.08, i32* %arrayidx, align 4
- %arrayidx3 = getelementptr inbounds i32* %a, i64 %indvars.iv
- %0 = load i32* %arrayidx3, align 4
- %lftr.wideiv = trunc i64 %indvars.iv.next to i32
- %exitcond = icmp ne i32 %lftr.wideiv, 1024
- br i1 %exitcond, label %for.body, label %for.end
-
-for.end:
- ret i32 undef
-}
-
-; Don't vectorize true loop carried dependencies that are not a multiple of the
-; vector width.
-; Example:
-; for (int i = ...; ++i) {
-; a[i] = a[i-3] + ...;
-; It is a bad idea to vectorize this loop because store-load forwarding will not
-; happen.
-;
-
-; CHECK: @nostoreloadforward
-; CHECK-NOT: <2 x i32>
-
-define void @nostoreloadforward(i32* %A) {
-entry:
- br label %for.body
-
-for.body:
- %indvars.iv = phi i64 [ 16, %entry ], [ %indvars.iv.next, %for.body ]
- %0 = add nsw i64 %indvars.iv, -3
- %arrayidx = getelementptr inbounds i32* %A, i64 %0
- %1 = load i32* %arrayidx, align 4
- %2 = add nsw i64 %indvars.iv, 4
- %arrayidx2 = getelementptr inbounds i32* %A, i64 %2
- %3 = load i32* %arrayidx2, align 4
- %add3 = add nsw i32 %3, %1
- %arrayidx5 = getelementptr inbounds i32* %A, i64 %indvars.iv
- store i32 %add3, i32* %arrayidx5, align 4
- %indvars.iv.next = add i64 %indvars.iv, 1
- %lftr.wideiv = trunc i64 %indvars.iv.next to i32
- %exitcond = icmp ne i32 %lftr.wideiv, 128
- br i1 %exitcond, label %for.body, label %for.end
-
-for.end:
- ret void
-}
-
-; Example:
-; for (int i = ...; ++i) {
-; a[i] = b[i];
-; c[i] = a[i-3] + ...;
-; It is a bad idea to vectorize this loop because store-load forwarding will not
-; happen.
-;
-
-; CHECK: @nostoreloadforward2
-; CHECK-NOT: <2 x i32>
-
-define void @nostoreloadforward2(i32* noalias %A, i32* noalias %B, i32* noalias %C) {
-entry:
- br label %for.body
-
-for.body:
- %indvars.iv = phi i64 [ 16, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv
- %0 = load i32* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds i32* %A, i64 %indvars.iv
- store i32 %0, i32* %arrayidx2, align 4
- %1 = add nsw i64 %indvars.iv, -3
- %arrayidx4 = getelementptr inbounds i32* %A, i64 %1
- %2 = load i32* %arrayidx4, align 4
- %arrayidx6 = getelementptr inbounds i32* %C, i64 %indvars.iv
- store i32 %2, i32* %arrayidx6, align 4
- %indvars.iv.next = add i64 %indvars.iv, 1
- %lftr.wideiv = trunc i64 %indvars.iv.next to i32
- %exitcond = icmp ne i32 %lftr.wideiv, 128
- br i1 %exitcond, label %for.body, label %for.end
-
-for.end:
- ret void
-}