if (!BasePointer)
break;
AccessFn = SE->getMinusSCEV(AccessFn, BasePointer);
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(AccessFn);
-
- // Do not try to delinearize memory accesses that are not AddRecs.
- if (!AR)
- break;
-
O << "\n";
O << "Inst:" << *Inst << "\n";
O << "In Loop with Header: " << L->getHeader()->getName() << "\n";
- O << "AddRec: " << *AR << "\n";
+ O << "AccessFunction: " << *AccessFn << "\n";
SmallVector<const SCEV *, 3> Subscripts, Sizes;
- SE->delinearize(AR, Subscripts, Sizes, SE->getElementSize(Inst));
+ SE->delinearize(AccessFn, Subscripts, Sizes, SE->getElementSize(Inst));
if (Subscripts.size() == 0 || Sizes.size() == 0 ||
Subscripts.size() != Sizes.size()) {
O << "failed to delinearize\n";
}
bool isDone() const { return false; }
};
+
+// Check if a SCEV contains an AddRecExpr.
+struct SCEVHasAddRec {
+ bool &ContainsAddRec;
+
+ SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) {
+ ContainsAddRec = false;
+ }
+
+ bool follow(const SCEV *S) {
+ if (isa<SCEVAddRecExpr>(S)) {
+ ContainsAddRec = true;
+
+ // Stop recursion: once we collected a term, do not walk its operands.
+ return false;
+ }
+
+ // Keep looking.
+ return true;
+ }
+ bool isDone() const { return false; }
+};
+
+// Find factors that are multiplied with an expression that (possibly as a
+// subexpression) contains an AddRecExpr. In the expression:
+//
+// 8 * (100 + %p * %q * (%a + {0, +, 1}_loop))
+//
+// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)"
+// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size
+// parameters as they form a product with an induction variable.
+//
+// This collector expects all array size parameters to be in the same MulExpr.
+// It might be necessary to later add support for collecting parameters that are
+// spread over different nested MulExpr.
+struct SCEVCollectAddRecMultiplies {
+ SmallVectorImpl<const SCEV *> &Terms;
+ ScalarEvolution &SE;
+
+ SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T, ScalarEvolution &SE)
+ : Terms(T), SE(SE) {}
+
+ bool follow(const SCEV *S) {
+ if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) {
+ bool HasAddRec = false;
+ SmallVector<const SCEV *, 0> Operands;
+ for (auto Op : Mul->operands()) {
+ if (isa<SCEVUnknown>(Op)) {
+ Operands.push_back(Op);
+ } else {
+ bool ContainsAddRec;
+ SCEVHasAddRec ContiansAddRec(ContainsAddRec);
+ visitAll(Op, ContiansAddRec);
+ HasAddRec |= ContainsAddRec;
+ }
+ }
+ if (Operands.size() == 0)
+ return true;
+
+ if (!HasAddRec)
+ return false;
+
+ Terms.push_back(SE.getMulExpr(Operands));
+ // Stop recursion: once we collected a term, do not walk its operands.
+ return false;
+ }
+
+ // Keep looking.
+ return true;
+ }
+ bool isDone() const { return false; }
+};
}
-/// Find parametric terms in this SCEVAddRecExpr.
+/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in
+/// two places:
+/// 1) The strides of AddRec expressions.
+/// 2) Unknowns that are multiplied with AddRec expressions.
void ScalarEvolution::collectParametricTerms(const SCEV *Expr,
SmallVectorImpl<const SCEV *> &Terms) {
SmallVector<const SCEV *, 4> Strides;
for (const SCEV *T : Terms)
dbgs() << *T << "\n";
});
+
+ SCEVCollectAddRecMultiplies MulCollector(Terms, *this);
+ visitAll(Expr, MulCollector);
}
static bool findArrayDimensionsRec(ScalarEvolution &SE,
ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
- // Divide all terms by the element size.
+ // Try to divide all terms by the element size. If term is not divisible by
+ // element size, proceed with the original term.
for (const SCEV *&Term : Terms) {
const SCEV *Q, *R;
SCEVDivision::divide(SE, Term, ElementSize, &Q, &R);
- Term = Q;
+ if (!Q->isZero())
+ Term = Q;
}
SmallVector<const SCEV *, 4> NewTerms;
--- /dev/null
+; RUN: opt -delinearize -analyze < %s | FileCheck %s
+;
+; void foo(float *A, long *p) {
+; for (long i = 0; i < 100; i++)
+; for (long j = 0; j < 100; j++)
+; A[i * (*p) + j] += i + j;
+; }
+;
+; CHECK: ArrayDecl[UnknownSize][%pval] with elements of 4 bytes.
+; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%bb2>][{0,+,1}<nuw><nsw><%bb4>]
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo(float* %A, i64* %p) {
+bb:
+ br label %bb2
+
+bb2: ; preds = %bb16, %bb
+ %i.0 = phi i64 [ 0, %bb ], [ %tmp17, %bb16 ]
+ %exitcond1 = icmp ne i64 %i.0, 100
+ br i1 %exitcond1, label %bb3, label %bb18
+
+bb3: ; preds = %bb2
+ br label %bb4
+
+bb4: ; preds = %bb13, %bb3
+ %j.0 = phi i64 [ 0, %bb3 ], [ %tmp14, %bb13 ]
+ %exitcond = icmp ne i64 %j.0, 100
+ br i1 %exitcond, label %bb5, label %bb15
+
+bb5: ; preds = %bb4
+ %tmp = add nuw nsw i64 %i.0, %j.0
+ %tmp6 = sitofp i64 %tmp to float
+ %pval = load i64, i64* %p, align 8
+ %tmp8 = mul nsw i64 %i.0, %pval
+ %tmp9 = add nsw i64 %tmp8, %j.0
+ %tmp10 = getelementptr inbounds float, float* %A, i64 %tmp9
+ %tmp11 = load float, float* %tmp10, align 4
+ %tmp12 = fadd float %tmp11, %tmp6
+ store float %tmp12, float* %tmp10, align 4
+ br label %bb13
+
+bb13: ; preds = %bb5
+ %tmp14 = add nuw nsw i64 %j.0, 1
+ br label %bb4
+
+bb15: ; preds = %bb4
+ br label %bb16
+
+bb16: ; preds = %bb15
+ %tmp17 = add nuw nsw i64 %i.0, 1
+ br label %bb2
+
+bb18: ; preds = %bb2
+ ret void
+}