From 505bc5070135483f0700cb04b33ff670efc84816 Mon Sep 17 00:00:00 2001 From: Kyle Butt Date: Fri, 8 Jan 2016 01:55:13 +0000 Subject: [PATCH] [Vectorization] Actually return from error case in isStridedPtr The early return seems to be missed. This causes a radical and wrong loop optimization on powerpc. It isn't reproducible on x86_64, because "UseInterleaved" is false. Patch by Tim Shen. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257134 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/LoopAccessAnalysis.cpp | 1 + .../interleave-innermost.ll | 29 +++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 test/Analysis/LoopAccessAnalysis/interleave-innermost.ll diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index d7896ade354..8bcdcb86201 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -845,6 +845,7 @@ int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, if (Lp != AR->getLoop()) { DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " << *Ptr << " SCEV: " << *PtrScev << "\n"); + return 0; } // The address calculation must not wrap. Otherwise, a dependence could be diff --git a/test/Analysis/LoopAccessAnalysis/interleave-innermost.ll b/test/Analysis/LoopAccessAnalysis/interleave-innermost.ll new file mode 100644 index 00000000000..6d8288e8ce3 --- /dev/null +++ b/test/Analysis/LoopAccessAnalysis/interleave-innermost.ll @@ -0,0 +1,29 @@ +; RUN: opt -loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s +; CHECK-LABEL: TestFoo +; CHECK-NOT: %wide.vec + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +define void @TestFoo(i1 %X, i1 %Y) { +bb: + br label %.loopexit5.outer + +.loopexit5.outer: + br label %.lr.ph12 + +.loopexit: + br i1 %X, label %.loopexit5.outer, label %.lr.ph12 + +.lr.ph12: + %f.110 = phi i32* [ %tmp1, %.loopexit ], [ null, %.loopexit5.outer ] + %tmp1 = getelementptr inbounds i32, i32* %f.110, i64 -2 + br i1 %Y, label %bb4, label %.loopexit + +bb4: + %j.27 = phi i32 [ 0, %.lr.ph12 ], [ %tmp7, %bb4 ] + %tmp5 = load i32, i32* %f.110, align 4 + %tmp7 = add nsw i32 %j.27, 1 + %exitcond = icmp eq i32 %tmp7, 0 + br i1 %exitcond, label %.loopexit, label %bb4 +} -- 2.34.1