/// This struct holds information about the memory runtime legality check that
/// a group of pointers do not overlap.
struct RuntimePointerCheck {
- RuntimePointerCheck() : Need(false) {}
+ RuntimePointerCheck(ScalarEvolution *SE) : Need(false), SE(SE) {}
/// Reset the state of the pointer runtime information.
void reset() {
IsWritePtr.clear();
DependencySetId.clear();
AliasSetId.clear();
+ Exprs.clear();
}
/// Insert a pointer and calculate the start and end SCEVs.
- void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr,
- unsigned DepSetId, unsigned ASId,
- const ValueToValueMap &Strides);
+ void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
+ unsigned ASId, const ValueToValueMap &Strides);
/// \brief No run-time memory checking is necessary.
bool empty() const { return Pointers.empty(); }
+ /// A grouping of pointers. A single memcheck is required between
+ /// two groups.
+ struct CheckingPtrGroup {
+ /// \brief Create a new pointer checking group containing a single
+ /// pointer, with index \p Index in RtCheck.
+ CheckingPtrGroup(unsigned Index, RuntimePointerCheck &RtCheck)
+ : RtCheck(RtCheck), High(RtCheck.Ends[Index]),
+ Low(RtCheck.Starts[Index]) {
+ Members.push_back(Index);
+ }
+
+ /// \brief Tries to add the pointer recorded in RtCheck at index
+ /// \p Index to this pointer checking group. We can only add a pointer
+ /// to a checking group if we will still be able to get
+ /// the upper and lower bounds of the check. Returns true in case
+ /// of success, false otherwise.
+ bool addPointer(unsigned Index);
+
+ /// Constitutes the context of this pointer checking group. For each
+ /// pointer that is a member of this group we will retain the index
+ /// at which it appears in RtCheck.
+ RuntimePointerCheck &RtCheck;
+ /// The SCEV expression which represents the upper bound of all the
+ /// pointers in this group.
+ const SCEV *High;
+ /// The SCEV expression which represents the lower bound of all the
+ /// pointers in this group.
+ const SCEV *Low;
+ /// Indices of all the pointers that constitute this grouping.
+ SmallVector<unsigned, 2> Members;
+ };
+
+ /// \brief Groups pointers such that a single memcheck is required
+ /// between two different groups. This will clear the CheckingGroups vector
+ /// and re-compute it. We will only group dependecies if \p UseDependencies
+ /// is true, otherwise we will create a separate group for each pointer.
+ void groupChecks(MemoryDepChecker::DepCandidates &DepCands,
+ bool UseDependencies);
+
/// \brief Decide whether we need to issue a run-time check for pointer at
/// index \p I and \p J to prove their independence.
///
bool needsChecking(unsigned I, unsigned J,
const SmallVectorImpl<int> *PtrPartition) const;
+ /// \brief Decide if we need to add a check between two groups of pointers,
+ /// according to needsChecking.
+ bool needsChecking(const CheckingPtrGroup &M,
+ const CheckingPtrGroup &N,
+ const SmallVectorImpl<int> *PtrPartition) const;
+
/// \brief Return true if any pointer requires run-time checking according
/// to needsChecking.
bool needsAnyChecking(const SmallVectorImpl<int> *PtrPartition) const;
SmallVector<unsigned, 2> DependencySetId;
/// Holds the id of the disjoint alias set to which this pointer belongs.
SmallVector<unsigned, 2> AliasSetId;
+ /// Holds at position i the SCEV for the access i
+ SmallVector<const SCEV *, 2> Exprs;
+ /// Holds a partitioning of pointers into "check groups".
+ SmallVector<CheckingPtrGroup, 2> CheckingGroups;
+ /// Holds a pointer to the ScalarEvolution analysis.
+ ScalarEvolution *SE;
};
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL,
cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
+/// \brief The maximum iterations used to merge memory checks
+static cl::opt<unsigned> MemoryCheckMergeThreshold(
+ "memory-check-merge-threshold", cl::Hidden,
+ cl::desc("Maximum number of comparisons done when trying to merge "
+ "runtime memory checks. (default = 100)"),
+ cl::init(100));
+
/// Maximum SIMD width.
const unsigned VectorizerParams::MaxVectorWidth = 64;
}
void LoopAccessInfo::RuntimePointerCheck::insert(
- ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
- unsigned ASId, const ValueToValueMap &Strides) {
+ Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId,
+ const ValueToValueMap &Strides) {
// Get the stride replaced scev.
const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
IsWritePtr.push_back(WritePtr);
DependencySetId.push_back(DepSetId);
AliasSetId.push_back(ASId);
+ Exprs.push_back(Sc);
+}
+
+bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
+ const CheckingPtrGroup &M, const CheckingPtrGroup &N,
+ const SmallVectorImpl<int> *PtrPartition) const {
+ for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
+ for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
+ if (needsChecking(M.Members[I], N.Members[J], PtrPartition))
+ return true;
+ return false;
+}
+
+/// Compare \p I and \p J and return the minimum.
+/// Return nullptr in case we couldn't find an answer.
+static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
+ ScalarEvolution *SE) {
+ const SCEV *Diff = SE->getMinusSCEV(J, I);
+ const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
+
+ if (!C)
+ return nullptr;
+ if (C->getValue()->isNegative())
+ return J;
+ return I;
+}
+
+bool LoopAccessInfo::RuntimePointerCheck::CheckingPtrGroup::addPointer(
+ unsigned Index) {
+ // Compare the starts and ends with the known minimum and maximum
+ // of this set. We need to know how we compare against the min/max
+ // of the set in order to be able to emit memchecks.
+ const SCEV *Min0 = getMinFromExprs(RtCheck.Starts[Index], Low, RtCheck.SE);
+ if (!Min0)
+ return false;
+
+ const SCEV *Min1 = getMinFromExprs(RtCheck.Ends[Index], High, RtCheck.SE);
+ if (!Min1)
+ return false;
+
+ // Update the low bound expression if we've found a new min value.
+ if (Min0 == RtCheck.Starts[Index])
+ Low = RtCheck.Starts[Index];
+
+ // Update the high bound expression if we've found a new max value.
+ if (Min1 != RtCheck.Ends[Index])
+ High = RtCheck.Ends[Index];
+
+ Members.push_back(Index);
+ return true;
+}
+
+void LoopAccessInfo::RuntimePointerCheck::groupChecks(
+ MemoryDepChecker::DepCandidates &DepCands,
+ bool UseDependencies) {
+ // We build the groups from dependency candidates equivalence classes
+ // because:
+ // - We know that pointers in the same equivalence class share
+ // the same underlying object and therefore there is a chance
+ // that we can compare pointers
+ // - We wouldn't be able to merge two pointers for which we need
+ // to emit a memcheck. The classes in DepCands are already
+ // conveniently built such that no two pointers in the same
+ // class need checking against each other.
+
+ // We use the following (greedy) algorithm to construct the groups
+ // For every pointer in the equivalence class:
+ // For each existing group:
+ // - if the difference between this pointer and the min/max bounds
+ // of the group is a constant, then make the pointer part of the
+ // group and update the min/max bounds of that group as required.
+
+ CheckingGroups.clear();
+
+ // If we don't have the dependency partitions, construct a new
+ // checking pointer group for each pointer.
+ if (!UseDependencies) {
+ for (unsigned I = 0; I < Pointers.size(); ++I)
+ CheckingGroups.push_back(CheckingPtrGroup(I, *this));
+ return;
+ }
+
+ unsigned TotalComparisons = 0;
+
+ DenseMap<Value *, unsigned> PositionMap;
+ for (unsigned Pointer = 0; Pointer < Pointers.size(); ++Pointer)
+ PositionMap[Pointers[Pointer]] = Pointer;
+
+ // Go through all equivalence classes, get the the "pointer check groups"
+ // and add them to the overall solution.
+ for (auto DI = DepCands.begin(), DE = DepCands.end(); DI != DE; ++DI) {
+ if (!DI->isLeader())
+ continue;
+
+ SmallVector<CheckingPtrGroup, 2> Groups;
+
+ for (auto MI = DepCands.member_begin(DI), ME = DepCands.member_end();
+ MI != ME; ++MI) {
+ unsigned Pointer = PositionMap[MI->getPointer()];
+ bool Merged = false;
+
+ // Go through all the existing sets and see if we can find one
+ // which can include this pointer.
+ for (CheckingPtrGroup &Group : Groups) {
+ // Don't perform more than a certain amount of comparisons.
+ // This should limit the cost of grouping the pointers to something
+ // reasonable. If we do end up hitting this threshold, the algorithm
+ // will create separate groups for all remaining pointers.
+ if (TotalComparisons > MemoryCheckMergeThreshold)
+ break;
+
+ TotalComparisons++;
+
+ if (Group.addPointer(Pointer)) {
+ Merged = true;
+ break;
+ }
+ }
+
+ if (!Merged)
+ // We couldn't add this pointer to any existing set or the threshold
+ // for the number of comparisons has been reached. Create a new group
+ // to hold the current pointer.
+ Groups.push_back(CheckingPtrGroup(Pointer, *this));
+ }
+
+ // We've computed the grouped checks for this partition.
+ // Save the results and continue with the next one.
+ std::copy(Groups.begin(), Groups.end(), std::back_inserter(CheckingGroups));
+ }
}
bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
void LoopAccessInfo::RuntimePointerCheck::print(
raw_ostream &OS, unsigned Depth,
const SmallVectorImpl<int> *PtrPartition) const {
- unsigned NumPointers = Pointers.size();
- if (NumPointers == 0)
- return;
OS.indent(Depth) << "Run-time memory checks:\n";
+
unsigned N = 0;
- for (unsigned I = 0; I < NumPointers; ++I)
- for (unsigned J = I + 1; J < NumPointers; ++J)
- if (needsChecking(I, J, PtrPartition)) {
- OS.indent(Depth) << N++ << ":\n";
- OS.indent(Depth + 2) << *Pointers[I];
- if (PtrPartition)
- OS << " (Partition: " << (*PtrPartition)[I] << ")";
- OS << "\n";
- OS.indent(Depth + 2) << *Pointers[J];
- if (PtrPartition)
- OS << " (Partition: " << (*PtrPartition)[J] << ")";
- OS << "\n";
+ for (unsigned I = 0; I < CheckingGroups.size(); ++I)
+ for (unsigned J = I + 1; J < CheckingGroups.size(); ++J)
+ if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) {
+ OS.indent(Depth) << "Check " << N++ << ":\n";
+ OS.indent(Depth + 2) << "Comparing group " << I << ":\n";
+
+ for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) {
+ OS.indent(Depth + 2) << *Pointers[CheckingGroups[I].Members[K]]
+ << "\n";
+ if (PtrPartition)
+ OS << " (Partition: "
+ << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")"
+ << "\n";
+ }
+
+ OS.indent(Depth + 2) << "Against group " << J << ":\n";
+
+ for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) {
+ OS.indent(Depth + 2) << *Pointers[CheckingGroups[J].Members[K]]
+ << "\n";
+ if (PtrPartition)
+ OS << " (Partition: "
+ << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")"
+ << "\n";
+ }
}
+
+ OS.indent(Depth) << "Grouped accesses:\n";
+ for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
+ OS.indent(Depth + 2) << "Group " << I << ":\n";
+ OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low
+ << " High: " << *CheckingGroups[I].High << ")\n";
+ for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) {
+ OS.indent(Depth + 6) << "Member: " << *Exprs[CheckingGroups[I].Members[J]]
+ << "\n";
+ }
+ }
}
unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks(
const SmallVectorImpl<int> *PtrPartition) const {
- unsigned NumPointers = Pointers.size();
+
+ unsigned NumPartitions = CheckingGroups.size();
unsigned CheckCount = 0;
- for (unsigned I = 0; I < NumPointers; ++I)
- for (unsigned J = I + 1; J < NumPointers; ++J)
- if (needsChecking(I, J, PtrPartition))
+ for (unsigned I = 0; I < NumPartitions; ++I)
+ for (unsigned J = I + 1; J < NumPartitions; ++J)
+ if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition))
CheckCount++;
return CheckCount;
}
bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking(
const SmallVectorImpl<int> *PtrPartition) const {
- return getNumberOfChecks(PtrPartition) != 0;
+ unsigned NumPointers = Pointers.size();
+
+ for (unsigned I = 0; I < NumPointers; ++I)
+ for (unsigned J = I + 1; J < NumPointers; ++J)
+ if (needsChecking(I, J, PtrPartition))
+ return true;
+ return false;
}
namespace {
// Each access has its own dependence set.
DepId = RunningDepId++;
- RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
+ RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
} else {
}
}
+ if (NeedRTCheck && CanDoRT)
+ RtCheck.groupChecks(DepCands, IsDepCheckNeeded);
+
return CanDoRT;
}
if (!PtrRtCheck.Need)
return std::make_pair(nullptr, nullptr);
- unsigned NumPointers = PtrRtCheck.Pointers.size();
- SmallVector<TrackingVH<Value> , 2> Starts;
- SmallVector<TrackingVH<Value> , 2> Ends;
+ SmallVector<TrackingVH<Value>, 2> Starts;
+ SmallVector<TrackingVH<Value>, 2> Ends;
LLVMContext &Ctx = Loc->getContext();
SCEVExpander Exp(*SE, DL, "induction");
Instruction *FirstInst = nullptr;
- for (unsigned i = 0; i < NumPointers; ++i) {
- Value *Ptr = PtrRtCheck.Pointers[i];
+ for (unsigned i = 0; i < PtrRtCheck.CheckingGroups.size(); ++i) {
+ const RuntimePointerCheck::CheckingPtrGroup &CG =
+ PtrRtCheck.CheckingGroups[i];
+ Value *Ptr = PtrRtCheck.Pointers[CG.Members[0]];
const SCEV *Sc = SE->getSCEV(Ptr);
if (SE->isLoopInvariant(Sc, TheLoop)) {
- DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" <<
- *Ptr <<"\n");
+ DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
+ << "\n");
Starts.push_back(Ptr);
Ends.push_back(Ptr);
} else {
- DEBUG(dbgs() << "LAA: Adding RT check for range:" << *Ptr << '\n');
unsigned AS = Ptr->getType()->getPointerAddressSpace();
// Use this type for pointer arithmetic.
Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
+ Value *Start = nullptr, *End = nullptr;
- Value *Start = Exp.expandCodeFor(PtrRtCheck.Starts[i], PtrArithTy, Loc);
- Value *End = Exp.expandCodeFor(PtrRtCheck.Ends[i], PtrArithTy, Loc);
+ DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
+ Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc);
+ End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc);
+ DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High << "\n");
Starts.push_back(Start);
Ends.push_back(End);
}
IRBuilder<> ChkBuilder(Loc);
// Our instructions might fold to a constant.
Value *MemoryRuntimeCheck = nullptr;
- for (unsigned i = 0; i < NumPointers; ++i) {
- for (unsigned j = i+1; j < NumPointers; ++j) {
- if (!PtrRtCheck.needsChecking(i, j, PtrPartition))
+ for (unsigned i = 0; i < PtrRtCheck.CheckingGroups.size(); ++i) {
+ for (unsigned j = i + 1; j < PtrRtCheck.CheckingGroups.size(); ++j) {
+ const RuntimePointerCheck::CheckingPtrGroup &CGI =
+ PtrRtCheck.CheckingGroups[i];
+ const RuntimePointerCheck::CheckingPtrGroup &CGJ =
+ PtrRtCheck.CheckingGroups[j];
+
+ if (!PtrRtCheck.needsChecking(CGI, CGJ, PtrPartition))
continue;
unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
DominatorTree *DT, LoopInfo *LI,
const ValueToValueMap &Strides)
- : DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
- TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
+ : PtrRtCheck(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), TLI(TLI),
+ AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
MaxSafeDepDistBytes(-1U), CanVecMem(false),
StoreToLoopInvariantAddress(false) {
if (canAnalyzeLoop())
; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
-; 3 reads and 3 writes should need 12 memchecks
-
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnueabi"
+; 3 reads and 3 writes should need 12 memchecks
+; CHECK: function 'testf':
; CHECK: Memory dependences are safe with run-time checks
-; Memory dependecies have labels starting from 0, so in
+
+; Memory dependencies have labels starting from 0, so in
; order to verify that we have n checks, we look for
; (n-1): and not n:.
; CHECK: Run-time memory checks:
-; CHECK-NEXT: 0:
-; CHECK: 11:
-; CHECK-NOT: 12:
+; CHECK-NEXT: Check 0:
+; CHECK: Check 11:
+; CHECK-NOT: Check 12:
define void @testf(i16* %a,
i16* %b,
for.end: ; preds = %for.body
ret void
}
+
+; The following (testg and testh) check that we can group
+; memory checks of accesses which differ by a constant value.
+; Both tests are based on the following C code:
+;
+; void testh(short *a, short *b, short *c) {
+; unsigned long ind = 0;
+; for (unsigned long ind = 0; ind < 20; ++ind) {
+; c[2 * ind] = a[ind] * a[ind + 1];
+; c[2 * ind + 1] = a[ind] * a[ind + 1] * b[ind];
+; }
+; }
+;
+; It is sufficient to check the intervals
+; [a, a + 21], [b, b + 20] against [c, c + 41].
+
+; 3 reads and 2 writes - two of the reads can be merged,
+; and the writes can be merged as well. This gives us a
+; total of 2 memory checks.
+
+; CHECK: function 'testg':
+
+; CHECK: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group 0:
+; CHECK-NEXT: %arrayidxA1 = getelementptr inbounds i16, i16* %a, i64 %add
+; CHECK-NEXT: %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
+; CHECK-NEXT: Against group 2:
+; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
+; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
+; CHECK-NEXT: Check 1:
+; CHECK-NEXT: Comparing group 1:
+; CHECK-NEXT: %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
+; CHECK-NEXT: Against group 2:
+; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
+; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
+; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group 0:
+; CHECK-NEXT: (Low: %a High: (40 + %a))
+; CHECK-NEXT: Member: {(2 + %a),+,2}
+; CHECK-NEXT: Member: {%a,+,2}
+; CHECK-NEXT: Group 1:
+; CHECK-NEXT: (Low: %b High: (38 + %b))
+; CHECK-NEXT: Member: {%b,+,2}
+; CHECK-NEXT: Group 2:
+; CHECK-NEXT: (Low: %c High: (78 + %c))
+; CHECK-NEXT: Member: {(2 + %c),+,4}
+; CHECK-NEXT: Member: {%c,+,4}
+
+define void @testg(i16* %a,
+ i16* %b,
+ i16* %c) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+ %store_ind = phi i64 [ 0, %entry ], [ %store_ind_next, %for.body ]
+
+ %add = add nuw nsw i64 %ind, 1
+ %store_ind_inc = add nuw nsw i64 %store_ind, 1
+ %store_ind_next = add nuw nsw i64 %store_ind_inc, 1
+
+ %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
+ %loadA = load i16, i16* %arrayidxA, align 2
+
+ %arrayidxA1 = getelementptr inbounds i16, i16* %a, i64 %add
+ %loadA1 = load i16, i16* %arrayidxA1, align 2
+
+ %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
+ %loadB = load i16, i16* %arrayidxB, align 2
+
+ %mul = mul i16 %loadA, %loadA1
+ %mul1 = mul i16 %mul, %loadB
+
+ %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
+ store i16 %mul1, i16* %arrayidxC, align 2
+
+ %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
+ store i16 %mul, i16* %arrayidxC1, align 2
+
+ %exitcond = icmp eq i64 %add, 20
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+; 3 reads and 2 writes - the writes can be merged into a single
+; group, but the GEPs used for the reads are not marked as inbounds.
+; We can still merge them because we are using a unit stride for
+; accesses, so we cannot overflow the GEPs.
+
+; CHECK: function 'testh':
+; CHECK: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group 0:
+; CHECK-NEXT: %arrayidxA1 = getelementptr i16, i16* %a, i64 %add
+; CHECK-NEXT: %arrayidxA = getelementptr i16, i16* %a, i64 %ind
+; CHECK-NEXT: Against group 2:
+; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
+; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
+; CHECK-NEXT: Check 1:
+; CHECK-NEXT: Comparing group 1:
+; CHECK-NEXT: %arrayidxB = getelementptr i16, i16* %b, i64 %ind
+; CHECK-NEXT: Against group 2:
+; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
+; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
+; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group 0:
+; CHECK-NEXT: (Low: %a High: (40 + %a))
+; CHECK-NEXT: Member: {(2 + %a),+,2}
+; CHECK-NEXT: Member: {%a,+,2}
+; CHECK-NEXT: Group 1:
+; CHECK-NEXT: (Low: %b High: (38 + %b))
+; CHECK-NEXT: Member: {%b,+,2}
+; CHECK-NEXT: Group 2:
+; CHECK-NEXT: (Low: %c High: (78 + %c))
+; CHECK-NEXT: Member: {(2 + %c),+,4}
+; CHECK-NEXT: Member: {%c,+,4}
+
+define void @testh(i16* %a,
+ i16* %b,
+ i16* %c) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+ %store_ind = phi i64 [ 0, %entry ], [ %store_ind_next, %for.body ]
+
+ %add = add nuw nsw i64 %ind, 1
+ %store_ind_inc = add nuw nsw i64 %store_ind, 1
+ %store_ind_next = add nuw nsw i64 %store_ind_inc, 1
+
+ %arrayidxA = getelementptr i16, i16* %a, i64 %ind
+ %loadA = load i16, i16* %arrayidxA, align 2
+
+ %arrayidxA1 = getelementptr i16, i16* %a, i64 %add
+ %loadA1 = load i16, i16* %arrayidxA1, align 2
+
+ %arrayidxB = getelementptr i16, i16* %b, i64 %ind
+ %loadB = load i16, i16* %arrayidxB, align 2
+
+ %mul = mul i16 %loadA, %loadA1
+ %mul1 = mul i16 %mul, %loadB
+
+ %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
+ store i16 %mul1, i16* %arrayidxC, align 2
+
+ %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
+ store i16 %mul, i16* %arrayidxC1, align 2
+
+ %exitcond = icmp eq i64 %add, 20
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
; CHECK-NEXT: Interesting Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: 0:
+; CHECK-NEXT: Comparing group
; CHECK-NEXT: %arrayidxA2 = getelementptr inbounds i16, i16* %a, i64 %idx
+; CHECK-NEXT: Against group
; CHECK-NEXT: %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %indvar
@B = common global i16* null, align 8
; CHECK-NEXT: store i16 %mul1, i16* %arrayidxA_plus_2, align 2
; CHECK: Run-time memory checks:
; CHECK-NEXT: 0:
+; CHECK-NEXT: Comparing group
+; CHECK-NEXT: %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %storemerge3
; CHECK-NEXT: %arrayidxA_plus_2 = getelementptr inbounds i16, i16* %a, i64 %add
+; CHECK-NEXT: Against group
; CHECK-NEXT: %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %storemerge3
; CHECK-NEXT: 1:
+; CHECK-NEXT: Comparing group
+; CHECK-NEXT: %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %storemerge3
; CHECK-NEXT: %arrayidxA_plus_2 = getelementptr inbounds i16, i16* %a, i64 %add
+; CHECK-NEXT: Against group
; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %storemerge3
@B = common global i16* null, align 8
%e = load i32*, i32** @E, align 8
br label %for.body
-; We have two compares for each array overlap check which is a total of 10
-; compares.
+; We have two compares for each array overlap check.
+; Since the checks to A and A + 4 get merged, this will give us a
+; total of 8 compares.
;
; CHECK: for.body.lver.memcheck:
; CHECK: = icmp
; CHECK: = icmp
; CHECK: = icmp
-; CHECK: = icmp
-; CHECK: = icmp
-
; CHECK-NOT: = icmp
; CHECK: br i1 %memcheck.conflict, label %for.body.ph.lver.orig, label %for.body.ph.ldist1