From: Nadav Rotem Date: Sat, 27 Oct 2012 04:11:32 +0000 (+0000) Subject: 1. Fix a bug in getTypeConversion. When a *simple* type is split, we need to return... X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=f065a8467785015336432e3e6e584798d8b48d8e;p=oota-llvm.git 1. Fix a bug in getTypeConversion. When a *simple* type is split, we need to return the type of the split result. 2. Change the maximum vectorization width from 4 to 8. 3. A test for both. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166864 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 9d0aeaa3560..13f80fda3d6 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -1980,6 +1980,9 @@ public: ValueTypeActions.getTypeAction(NVT.getSimpleVT()) != TypePromoteInteger) && "Promote may not follow Expand or Promote"); + if (LA == TypeSplitVector) + NVT = EVT::getVectorVT(Context, VT.getVectorElementType(), + VT.getVectorNumElements() / 2); return LegalizeKind(LA, NVT); } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 1773812da24..be197db9563 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -317,7 +317,7 @@ public: /// Returns the most profitable vectorization factor for the loop that is /// smaller or equal to the VF argument. This method checks every power /// of two up to VF. - unsigned findBestVectorizationFactor(unsigned VF = 4); + unsigned findBestVectorizationFactor(unsigned VF = 8); private: /// Returns the expected execution cost. The unit of the cost does diff --git a/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/test/Transforms/LoopVectorize/X86/gcc-examples.ll new file mode 100644 index 00000000000..e7a63c9316a --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/gcc-examples.ll @@ -0,0 +1,62 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@b = common global [2048 x i32] zeroinitializer, align 16 +@c = common global [2048 x i32] zeroinitializer, align 16 +@a = common global [2048 x i32] zeroinitializer, align 16 + +; Select VF = 8; +;CHECK: @example1 +;CHECK: load <8 x i32> +;CHECK: add <8 x i32> +;CHECK: store <8 x i32> +;CHECK: ret void +define void @example1() nounwind uwtable ssp { + br label %1 + +;