From 8510b6c434e3a4a82dc6e8b879e181726a880c14 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 10 Sep 2015 05:27:38 +0000 Subject: [PATCH] [ScalarEvolution] Fix PR24757. Summary: PR24757 was caused by some incorect math in `ScalarEvolution::HowFarToZero` -- the smallest unsigned solution for X in 2^N * A = 2^N * X is not necessarily A. Reviewers: atrick, majnemer, meheff Subscribers: llvm-commits, sanjoy Differential Revision: http://reviews.llvm.org/D12721 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@247242 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 44 ++++++++++++++++++++++-- test/Analysis/ScalarEvolution/pr24757.ll | 35 +++++++++++++++++++ 2 files changed, 77 insertions(+), 2 deletions(-) create mode 100644 test/Analysis/ScalarEvolution/pr24757.ll diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index ef695234b66..75b427fff49 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -6306,8 +6306,48 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { // also returns true if StepV is maximally negative (eg, INT_MIN), but that // case is not handled as this code is guarded by !CountDown. if (StepV.isPowerOf2() && - GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros()) - return getUDivExactExpr(Distance, Step); + GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros()) { + // Here we've constrained the equation to be of the form + // + // 2^(N + k) * Distance' = (StepV == 2^N) * X (mod 2^W) ... (0) + // + // where we're operating on a W bit wide integer domain and k is + // non-negative. The smallest unsigned solution for X is the trip count. + // + // (0) is equivalent to: + // + // 2^(N + k) * Distance' - 2^N * X = L * 2^W + // <=> 2^N(2^k * Distance' - X) = L * 2^(W - N) * 2^N + // <=> 2^k * Distance' - X = L * 2^(W - N) + // <=> 2^k * Distance' = L * 2^(W - N) + X ... (1) + // + // The smallest X satisfying (1) is unsigned remainder of dividing the LHS + // by 2^(W - N). + // + // <=> X = 2^k * Distance' URem 2^(W - N) ... (2) + // + // E.g. say we're solving + // + // 2 * Val = 2 * X (in i8) ... (3) + // + // then from (2), we get X = Val URem i8 128 (k = 0 in this case). + // + // Note: It is tempting to solve (3) by setting X = Val, but Val is not + // necessarily the smallest unsigned value of X that satisfies (3). + // E.g. if Val is i8 -127 then the smallest value of X that satisfies (3) + // is i8 1, not i8 -127 + + const auto *ModuloResult = getUDivExactExpr(Distance, Step); + + // Since SCEV does not have a URem node, we construct one using a truncate + // and a zero extend. + + unsigned NarrowWidth = StepV.getBitWidth() - StepV.countTrailingZeros(); + auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth); + auto *WideTy = Distance->getType(); + + return getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy); + } } // If the condition controls loop exit (the loop exits only if the expression diff --git a/test/Analysis/ScalarEvolution/pr24757.ll b/test/Analysis/ScalarEvolution/pr24757.ll new file mode 100644 index 00000000000..815adcde0e9 --- /dev/null +++ b/test/Analysis/ScalarEvolution/pr24757.ll @@ -0,0 +1,35 @@ +; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s + +; CHECK: Loop %bb1: backedge-taken count is (zext i7 (trunc i8 %a.promoted to i7) to i8) + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +@a = global i8 -127, align 1 +@b = common global i32 0, align 4 + +declare void @use(i32) + +define i32 @main() { +bb: + %a.promoted = load i8, i8* @a + br label %bb1 + +bb1: ; preds = %bb1, %bb + %tmp = phi i8 [ %tmp2, %bb1 ], [ %a.promoted, %bb ] + %tmp2 = add i8 %tmp, -1 + %tmp3 = sext i8 %tmp to i32 + %tmp4 = xor i32 %tmp3, -1 + %tmp5 = sext i8 %tmp2 to i32 + %tmpf = sub nsw i32 %tmp4, %tmp5 + %tmp6 = trunc i32 %tmpf to i8 + %tmp7 = icmp eq i8 %tmp6, 0 + br i1 %tmp7, label %bb8, label %bb1 + +bb8: ; preds = %bb1 + store i8 %tmp2, i8* @a + store i32 %tmp4, i32* @b + %tmp9 = sext i8 %tmp2 to i32 + call void @use(i32 %tmp9) + ret i32 0 +} -- 2.34.1