[vectorizer] Teach the loop vectorizer's unroller to only unroll by

author Chandler Carruth <chandlerc@gmail.com>

Mon, 27 Jan 2014 11:12:24 +0000 (11:12 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Mon, 27 Jan 2014 11:12:24 +0000 (11:12 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Mon, 27 Jan 2014 11:12:24 +0000 (11:12 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Mon, 27 Jan 2014 11:12:24 +0000 (11:12 +0000)
diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h

index 13c2f72e1d96d05941c871c155b624e4eb118aed..30a1ad458443e84426bfccbb36e1f83ed015c664 100644 (file)
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@@ -552,6 +552,13 @@ inline uint64_t NextPowerOf2(uint64_t A) {
    return A + 1;
  }
  
+/// Returns the power of two which is less than or equal to the given value.
+/// Essentially, it is a floor operation across the domain of powers of two.
+inline uint64_t PowerOf2Floor(uint64_t A) {
+  if (!A) return 0;
+  return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
+}
+
  /// Returns the next integer (mod 2**64) that is greater than or equal to
  /// \p Value and is a multiple of \p Align. \p Align must be non-zero.
  ///
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp

index 23dc8f39eea1f197586fd565e93dc6cdb86a58e8..5c9933a2c726eb232097f59bdf027bd992cdf776 100644 (file)
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5004,8 +5004,11 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
    // registers. These registers are used by all of the unrolled instances.
    // Next, divide the remaining registers by the number of registers that is
    // required by the loop, in order to estimate how many parallel instances
-  // fit without causing spills.
-  unsigned UF = (TargetNumRegisters - R.LoopInvariantRegs) / R.MaxLocalUsers;
+  // fit without causing spills. All of this is rounded down if necessary to be
+  // a power of two. We want power of two unroll factors to simplify any
+  // addressing operations or alignment considerations.
+  unsigned UF = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) /
+                              R.MaxLocalUsers);
  
    // Clamp the unroll factor ranges to reasonable factors.
    unsigned MaxUnrollSize = TTI.getMaximumUnrollFactor();
@@ -5045,7 +5048,7 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
    DEBUG(dbgs() << "LV: Loop cost is " << LoopCost << '\n');
    if (LoopCost < SmallLoopCost) {
      DEBUG(dbgs() << "LV: Unrolling to reduce branch cost.\n");
-    unsigned NewUF = SmallLoopCost / (LoopCost + 1);
+    unsigned NewUF = PowerOf2Floor(SmallLoopCost / LoopCost);
      return std::min(NewUF, UF);
    }
  
diff --git a/test/Transforms/LoopVectorize/unroll_novec.ll b/test/Transforms/LoopVectorize/unroll_novec.ll

index 33f128da905df7880d4848cba8eb59e191cdb825..be5bbb68e65bfc4f8989d43cc64c372d863db380 100644 (file)
--- a/test/Transforms/LoopVectorize/unroll_novec.ll
+++ b/test/Transforms/LoopVectorize/unroll_novec.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=1 -force-vector-unroll=2 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=1 -force-target-num-scalar-regs=16 -force-target-max-scalar-unroll=8 -small-loop-cost=20 -dce -instcombine -S | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
@@ -12,10 +12,20 @@ target triple = "x86_64-apple-macosx10.8.0"
  ;CHECK-LABEL: @inc(
  ;CHECK: load i32*
  ;CHECK: load i32*
+;CHECK: load i32*
+;CHECK: load i32*
+;CHECK-NOT: load i32*
+;CHECK: add nsw i32
  ;CHECK: add nsw i32
  ;CHECK: add nsw i32
+;CHECK: add nsw i32
+;CHECK-NOT: add nsw i32
+;CHECK: store i32
+;CHECK: store i32
  ;CHECK: store i32
  ;CHECK: store i32
+;CHECK-NOT: store i32
+;CHECK: add i64 %{{.*}}, 4
  ;CHECK: ret void
  define void @inc(i32 %n) nounwind uwtable noinline ssp {
    %1 = icmp sgt i32 %n, 0
author	Chandler Carruth <chandlerc@gmail.com>
	Mon, 27 Jan 2014 11:12:24 +0000 (11:12 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Mon, 27 Jan 2014 11:12:24 +0000 (11:12 +0000)
include/llvm/Support/MathExtras.h		patch \| blob \| history
lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
test/Transforms/LoopVectorize/unroll_novec.ll		patch \| blob \| history