From 87cb71d23ec5fcdf9d94727f4f917c8853eed2c6 Mon Sep 17 00:00:00 2001 From: Tobias Grosser Date: Mon, 12 Oct 2015 08:02:00 +0000 Subject: [PATCH] SCEV: Allow simple AddRec * Parameter products in delinearization This patch also allows the -delinearize pass to delinearize expressions that do not have an outermost SCEVAddRec expression. The SCEV::delinearize infrastructure allowed this since r240952, but the -delinearize pass was not updated yet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@250018 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/Delinearization.cpp | 10 +-- lib/Analysis/ScalarEvolution.cpp | 86 ++++++++++++++++++- .../parameter_addrec_product.ll | 56 ++++++++++++ 3 files changed, 141 insertions(+), 11 deletions(-) create mode 100644 test/Analysis/Delinearization/parameter_addrec_product.ll diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp index 8e65c4c469a..baee8b3b084 100644 --- a/lib/Analysis/Delinearization.cpp +++ b/lib/Analysis/Delinearization.cpp @@ -102,20 +102,14 @@ void Delinearization::print(raw_ostream &O, const Module *) const { if (!BasePointer) break; AccessFn = SE->getMinusSCEV(AccessFn, BasePointer); - const SCEVAddRecExpr *AR = dyn_cast(AccessFn); - - // Do not try to delinearize memory accesses that are not AddRecs. - if (!AR) - break; - O << "\n"; O << "Inst:" << *Inst << "\n"; O << "In Loop with Header: " << L->getHeader()->getName() << "\n"; - O << "AddRec: " << *AR << "\n"; + O << "AccessFunction: " << *AccessFn << "\n"; SmallVector Subscripts, Sizes; - SE->delinearize(AR, Subscripts, Sizes, SE->getElementSize(Inst)); + SE->delinearize(AccessFn, Subscripts, Sizes, SE->getElementSize(Inst)); if (Subscripts.size() == 0 || Sizes.size() == 0 || Subscripts.size() != Sizes.size()) { O << "failed to delinearize\n"; diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 5843ed76fa2..fc7e09d88ba 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -8307,9 +8307,84 @@ struct SCEVCollectTerms { } bool isDone() const { return false; } }; + +// Check if a SCEV contains an AddRecExpr. +struct SCEVHasAddRec { + bool &ContainsAddRec; + + SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) { + ContainsAddRec = false; + } + + bool follow(const SCEV *S) { + if (isa(S)) { + ContainsAddRec = true; + + // Stop recursion: once we collected a term, do not walk its operands. + return false; + } + + // Keep looking. + return true; + } + bool isDone() const { return false; } +}; + +// Find factors that are multiplied with an expression that (possibly as a +// subexpression) contains an AddRecExpr. In the expression: +// +// 8 * (100 + %p * %q * (%a + {0, +, 1}_loop)) +// +// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)" +// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size +// parameters as they form a product with an induction variable. +// +// This collector expects all array size parameters to be in the same MulExpr. +// It might be necessary to later add support for collecting parameters that are +// spread over different nested MulExpr. +struct SCEVCollectAddRecMultiplies { + SmallVectorImpl &Terms; + ScalarEvolution &SE; + + SCEVCollectAddRecMultiplies(SmallVectorImpl &T, ScalarEvolution &SE) + : Terms(T), SE(SE) {} + + bool follow(const SCEV *S) { + if (auto *Mul = dyn_cast(S)) { + bool HasAddRec = false; + SmallVector Operands; + for (auto Op : Mul->operands()) { + if (isa(Op)) { + Operands.push_back(Op); + } else { + bool ContainsAddRec; + SCEVHasAddRec ContiansAddRec(ContainsAddRec); + visitAll(Op, ContiansAddRec); + HasAddRec |= ContainsAddRec; + } + } + if (Operands.size() == 0) + return true; + + if (!HasAddRec) + return false; + + Terms.push_back(SE.getMulExpr(Operands)); + // Stop recursion: once we collected a term, do not walk its operands. + return false; + } + + // Keep looking. + return true; + } + bool isDone() const { return false; } +}; } -/// Find parametric terms in this SCEVAddRecExpr. +/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in +/// two places: +/// 1) The strides of AddRec expressions. +/// 2) Unknowns that are multiplied with AddRec expressions. void ScalarEvolution::collectParametricTerms(const SCEV *Expr, SmallVectorImpl &Terms) { SmallVector Strides; @@ -8332,6 +8407,9 @@ void ScalarEvolution::collectParametricTerms(const SCEV *Expr, for (const SCEV *T : Terms) dbgs() << *T << "\n"; }); + + SCEVCollectAddRecMultiplies MulCollector(Terms, *this); + visitAll(Expr, MulCollector); } static bool findArrayDimensionsRec(ScalarEvolution &SE, @@ -8492,11 +8570,13 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl &Terms, ScalarEvolution &SE = *const_cast(this); - // Divide all terms by the element size. + // Try to divide all terms by the element size. If term is not divisible by + // element size, proceed with the original term. for (const SCEV *&Term : Terms) { const SCEV *Q, *R; SCEVDivision::divide(SE, Term, ElementSize, &Q, &R); - Term = Q; + if (!Q->isZero()) + Term = Q; } SmallVector NewTerms; diff --git a/test/Analysis/Delinearization/parameter_addrec_product.ll b/test/Analysis/Delinearization/parameter_addrec_product.ll new file mode 100644 index 00000000000..561158eae73 --- /dev/null +++ b/test/Analysis/Delinearization/parameter_addrec_product.ll @@ -0,0 +1,56 @@ +; RUN: opt -delinearize -analyze < %s | FileCheck %s +; +; void foo(float *A, long *p) { +; for (long i = 0; i < 100; i++) +; for (long j = 0; j < 100; j++) +; A[i * (*p) + j] += i + j; +; } +; +; CHECK: ArrayDecl[UnknownSize][%pval] with elements of 4 bytes. +; CHECK: ArrayRef[{0,+,1}<%bb2>][{0,+,1}<%bb4>] +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @foo(float* %A, i64* %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb16, %bb + %i.0 = phi i64 [ 0, %bb ], [ %tmp17, %bb16 ] + %exitcond1 = icmp ne i64 %i.0, 100 + br i1 %exitcond1, label %bb3, label %bb18 + +bb3: ; preds = %bb2 + br label %bb4 + +bb4: ; preds = %bb13, %bb3 + %j.0 = phi i64 [ 0, %bb3 ], [ %tmp14, %bb13 ] + %exitcond = icmp ne i64 %j.0, 100 + br i1 %exitcond, label %bb5, label %bb15 + +bb5: ; preds = %bb4 + %tmp = add nuw nsw i64 %i.0, %j.0 + %tmp6 = sitofp i64 %tmp to float + %pval = load i64, i64* %p, align 8 + %tmp8 = mul nsw i64 %i.0, %pval + %tmp9 = add nsw i64 %tmp8, %j.0 + %tmp10 = getelementptr inbounds float, float* %A, i64 %tmp9 + %tmp11 = load float, float* %tmp10, align 4 + %tmp12 = fadd float %tmp11, %tmp6 + store float %tmp12, float* %tmp10, align 4 + br label %bb13 + +bb13: ; preds = %bb5 + %tmp14 = add nuw nsw i64 %j.0, 1 + br label %bb4 + +bb15: ; preds = %bb4 + br label %bb16 + +bb16: ; preds = %bb15 + %tmp17 = add nuw nsw i64 %i.0, 1 + br label %bb2 + +bb18: ; preds = %bb2 + ret void +} -- 2.34.1