From 99b03e3401c303f4115258b812e1d96e20f04945 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 4 May 2014 19:12:38 +0000 Subject: [PATCH] LoopUnroll: If we're doing partial unrolling, use the PartialThreshold to limit unrolling. Otherwise we use the same threshold as for complete unrolling, which is way too high. This made us unroll any loop smaller than 150 instructions by 8 times, but only if someone specified -march=core2 or better, which happens to be the default on darwin. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207940 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LoopUnrollPass.cpp | 9 +++-- test/Transforms/LoopUnroll/X86/partial.ll | 47 +++++++++++++++++++++++ 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 3290c6ff45b..fc28fd2bdce 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -238,9 +238,12 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { return false; } uint64_t Size = (uint64_t)LoopSize*Count; - if (TripCount != 1 && Size > Threshold) { - DEBUG(dbgs() << " Too large to fully unroll with count: " << Count - << " because size: " << Size << ">" << Threshold << "\n"); + if (TripCount != 1 && + (Size > Threshold || (Count != TripCount && Size > PartialThreshold))) { + if (Size > Threshold) + DEBUG(dbgs() << " Too large to fully unroll with count: " << Count + << " because size: " << Size << ">" << Threshold << "\n"); + bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial; if (!AllowPartial && !(Runtime && TripCount == 0)) { DEBUG(dbgs() << " will not try to unroll partially because " diff --git a/test/Transforms/LoopUnroll/X86/partial.ll b/test/Transforms/LoopUnroll/X86/partial.ll index 15867cbea0a..75b9c3fb89a 100644 --- a/test/Transforms/LoopUnroll/X86/partial.ll +++ b/test/Transforms/LoopUnroll/X86/partial.ll @@ -76,5 +76,52 @@ for.end: ; preds = %vector.body ret void } +define zeroext i16 @test1(i16* nocapture readonly %arr, i32 %n) #0 { +entry: + %cmp25 = icmp eq i32 %n, 0 + br i1 %cmp25, label %for.end, label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %reduction.026 = phi i16 [ %add14, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i16* %arr, i64 %indvars.iv + %0 = load i16* %arrayidx, align 2 + %add = add i16 %0, %reduction.026 + %sext = mul i64 %indvars.iv, 12884901888 + %idxprom3 = ashr exact i64 %sext, 32 + %arrayidx4 = getelementptr inbounds i16* %arr, i64 %idxprom3 + %1 = load i16* %arrayidx4, align 2 + %add7 = add i16 %add, %1 + %sext28 = mul i64 %indvars.iv, 21474836480 + %idxprom10 = ashr exact i64 %sext28, 32 + %arrayidx11 = getelementptr inbounds i16* %arr, i64 %idxprom10 + %2 = load i16* %arrayidx11, align 2 + %add14 = add i16 %add7, %2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %reduction.0.lcssa = phi i16 [ 0, %entry ], [ %add14, %for.body ] + ret i16 %reduction.0.lcssa + +; This loop is too large to be partially unrolled (size=16) + +; CHECK-LABEL: @test1 +; CHECK: br +; CHECK: br +; CHECK: br +; CHECK: br +; CHECK-NOT: br + +; CHECK-NOUNRL-LABEL: @test1 +; CHECK-NOUNRL: br +; CHECK-NOUNRL: br +; CHECK-NOUNRL: br +; CHECK-NOUNRL: br +; CHECK-NOUNRL-NOT: br +} + attributes #0 = { nounwind uwtable } -- 2.34.1