#define LLVM_ANALYSIS_DEPENDENCEANALYSIS_H
#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Pass.h"
/// in LoopNest.
bool isLoopInvariant(const SCEV *Expression, const Loop *LoopNest) const;
- /// Makes sure both subscripts (i.e. Pair->Src and Pair->Dst) share the same
- /// integer type by sign-extending one of them when necessary.
+ /// Makes sure all subscript pairs share the same integer type by
+ /// sign-extending as necessary.
/// Sign-extending a subscript is safe because getelementptr assumes the
- /// array subscripts are signed.
- void unifySubscriptType(Subscript *Pair);
+ /// array subscripts are signed.
+ void unifySubscriptType(ArrayRef<Subscript *> Pairs);
/// removeMatchingExtensions - Examines a subscript pair.
/// If the source and destination are identically sign (or zero)
}
}
-void DependenceAnalysis::unifySubscriptType(Subscript *Pair) {
- const SCEV *Src = Pair->Src;
- const SCEV *Dst = Pair->Dst;
- IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
- IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
- if (SrcTy == nullptr || DstTy == nullptr) {
- assert(SrcTy == DstTy && "This function only unify integer types and "
- "expect Src and Dst share the same type "
- "otherwise.");
- return;
+void DependenceAnalysis::unifySubscriptType(ArrayRef<Subscript *> Pairs) {
+
+ unsigned widestWidthSeen = 0;
+ Type *widestType;
+
+ // Go through each pair and find the widest bit to which we need
+ // to extend all of them.
+ for (unsigned i = 0; i < Pairs.size(); i++) {
+ const SCEV *Src = Pairs[i]->Src;
+ const SCEV *Dst = Pairs[i]->Dst;
+ IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
+ IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
+ if (SrcTy == nullptr || DstTy == nullptr) {
+ assert(SrcTy == DstTy && "This function only unify integer types and "
+ "expect Src and Dst share the same type "
+ "otherwise.");
+ continue;
+ }
+ if (SrcTy->getBitWidth() > widestWidthSeen) {
+ widestWidthSeen = SrcTy->getBitWidth();
+ widestType = SrcTy;
+ }
+ if (DstTy->getBitWidth() > widestWidthSeen) {
+ widestWidthSeen = DstTy->getBitWidth();
+ widestType = DstTy;
+ }
}
- if (SrcTy->getBitWidth() > DstTy->getBitWidth()) {
- // Sign-extend Dst to typeof(Src) if typeof(Src) is wider than typeof(Dst).
- Pair->Dst = SE->getSignExtendExpr(Dst, SrcTy);
- } else if (SrcTy->getBitWidth() < DstTy->getBitWidth()) {
- // Sign-extend Src to typeof(Dst) if typeof(Dst) is wider than typeof(Src).
- Pair->Src = SE->getSignExtendExpr(Src, DstTy);
+
+
+ assert(widestWidthSeen > 0);
+
+ // Now extend each pair to the widest seen.
+ for (unsigned i = 0; i < Pairs.size(); i++) {
+ const SCEV *Src = Pairs[i]->Src;
+ const SCEV *Dst = Pairs[i]->Dst;
+ IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
+ IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
+ if (SrcTy == nullptr || DstTy == nullptr) {
+ assert(SrcTy == DstTy && "This function only unify integer types and "
+ "expect Src and Dst share the same type "
+ "otherwise.");
+ continue;
+ }
+ if (SrcTy->getBitWidth() < widestWidthSeen)
+ // Sign-extend Src to widestType
+ Pairs[i]->Src = SE->getSignExtendExpr(Src, widestType);
+ if (DstTy->getBitWidth() < widestWidthSeen) {
+ // Sign-extend Dst to widestType
+ Pairs[i]->Dst = SE->getSignExtendExpr(Dst, widestType);
+ }
}
}
// return the coefficient (the step)
// corresponding to the specified loop.
// If there isn't one, return 0.
-// For example, given a*i + b*j + c*k, zeroing the coefficient
+// For example, given a*i + b*j + c*k, finding the coefficient
// corresponding to the j loop would yield b.
const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr,
const Loop *TargetLoop) const {
SmallBitVector Sivs(Pairs);
SmallBitVector Mivs(Pairs);
SmallBitVector ConstrainedLevels(MaxLevels + 1);
+ SmallVector<Subscript *, 4> PairsInGroup;
for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
DEBUG(dbgs() << SJ << " ");
if (Pair[SJ].Classification == Subscript::SIV)
Sivs.set(SJ);
else
Mivs.set(SJ);
+ PairsInGroup.push_back(&Pair[SJ]);
}
+ unifySubscriptType(PairsInGroup);
DEBUG(dbgs() << "}\n");
while (Sivs.any()) {
bool Changed = false;
for.end:
ret void
}
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; unsigned i, j;
+; for (i = 1; i < SIZE; i++) {
+; for (j = i; j < SIZE; j++) {
+; a[i][j] = a[i+1][j-1] + 2;
+; }
+; }
+; Extends the previous example to coupled MIV subscripts.
+
+
+@a = global [10004 x [10004 x i32]] zeroinitializer, align 16
+
+; Function Attrs: nounwind uwtable
+define void @coupled_miv_type_mismatch(i32 %n) #0 {
+; CHECK-LABEL: 'Dependence Analysis' for function 'coupled_miv_type_mismatch'
+; DELIN-LABEL: 'Dependence Analysis' for function 'coupled_miv_type_mismatch'
+entry:
+ br label %for.cond
+
+; CHECK: da analyze - input [0 *]!
+; CHECK: da analyze - anti [1 *]!
+; CHECK: da analyze - none!
+; DELIN: da analyze - input [0 *]!
+; DELIN: da analyze - anti [1 *]!
+; DELIN: da analyze - none!
+for.cond: ; preds = %for.inc11, %entry
+ %indvars.iv11 = phi i64 [ %indvars.iv.next12, %for.inc11 ], [ 1, %entry ]
+ %exitcond14 = icmp ne i64 %indvars.iv11, 10000
+ br i1 %exitcond14, label %for.cond1.preheader, label %for.end13
+
+for.cond1.preheader: ; preds = %for.cond
+ %0 = trunc i64 %indvars.iv11 to i32
+ br label %for.cond1
+
+for.cond1: ; preds = %for.cond1.preheader, %for.body3
+ %indvars.iv8 = phi i64 [ %indvars.iv11, %for.cond1.preheader ], [ %indvars.iv.next9, %for.body3 ]
+ %j.0 = phi i32 [ %inc, %for.body3 ], [ %0, %for.cond1.preheader ]
+ %lftr.wideiv = trunc i64 %indvars.iv8 to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, 10000
+ br i1 %exitcond, label %for.body3, label %for.inc11
+
+for.body3: ; preds = %for.cond1
+ %sub = add nsw i32 %j.0, -1
+ %idxprom = zext i32 %sub to i64
+ %1 = add nuw nsw i64 %indvars.iv11, 1
+ %arrayidx5 = getelementptr inbounds [10004 x [10004 x i32]], [10004 x [10004 x i32]]* @a, i64 0, i64 %1, i64 %idxprom
+ %2 = load i32, i32* %arrayidx5, align 4
+ %add6 = add nsw i32 %2, 2
+ %arrayidx10 = getelementptr inbounds [10004 x [10004 x i32]], [10004 x [10004 x i32]]* @a, i64 0, i64 %indvars.iv11, i64 %indvars.iv8
+ store i32 %add6, i32* %arrayidx10, align 4
+ %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1
+ %inc = add nuw nsw i32 %j.0, 1
+ br label %for.cond1
+
+for.inc11: ; preds = %for.cond1
+ %indvars.iv.next12 = add nuw nsw i64 %indvars.iv11, 1
+ br label %for.cond
+
+for.end13: ; preds = %for.cond
+ ret void
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (https://vaivaswatha@bitbucket.org/compilertree/amd_clang.git 93a05fb75ee3411d24e8b2b184fc766a5318403e) (https://vaivaswatha@bitbucket.org/compilertree/amd_llvm.git 166d93d26efc912b517739f64d054a435e8e95cd)"}