From: Michael Zolotukhin Date: Tue, 17 Mar 2015 19:50:55 +0000 (+0000) Subject: TLI: Add addVectorizableFunctionsFromVecLib. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=5850602050c071806d2e2b51600f87107fc110c3;p=oota-llvm.git TLI: Add addVectorizableFunctionsFromVecLib. Also, add several entries to vectorizable functions table, and corresponding tests. The table isn't complete, it'll be populated later. Review: http://reviews.llvm.org/D8131 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232531 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h index c2ece8cf3ba..e0a1ee37827 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.h +++ b/include/llvm/Analysis/TargetLibraryInfo.h @@ -71,6 +71,18 @@ class TargetLibraryInfoImpl { std::vector ScalarDescs; public: + /// \brief List of known vector-functions libraries. + /// + /// The vector-functions library defines, which functions are vectorizable + /// and with which factor. The library can be specified by either frontend, + /// or a commandline option, and then used by + /// addVectorizableFunctionsFromVecLib for filling up the tables of + /// vectorizable functions. + enum VectorLibrary { + NoLibrary, // Don't use any vector library. + Accelerate // Use Accelerate framework. + }; + TargetLibraryInfoImpl(); explicit TargetLibraryInfoImpl(const Triple &T); @@ -117,6 +129,10 @@ public: /// queryable via getVectorizedFunction and getScalarizedFunction. void addVectorizableFunctions(ArrayRef Fns); + /// Calls addVectorizableFunctions with a known preset of functions for the + /// given vector library. + void addVectorizableFunctionsFromVecLib(enum VectorLibrary VecLib); + /// isFunctionVectorizable - Return true if the function F has a /// vector equivalent with vectorization factor VF. bool isFunctionVectorizable(StringRef F, unsigned VF) const { diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp index c8f193f0299..7e574d55c83 100644 --- a/lib/Analysis/TargetLibraryInfo.cpp +++ b/lib/Analysis/TargetLibraryInfo.cpp @@ -13,8 +13,18 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/ADT/Triple.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +static cl::opt ClVectorLibrary( + "vector-library", cl::Hidden, cl::desc("Vector functions library"), + cl::init(TargetLibraryInfoImpl::NoLibrary), + cl::values(clEnumValN(TargetLibraryInfoImpl::NoLibrary, "none", + "No vector functions library"), + clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate", + "Accelerate framework"), + clEnumValEnd)); + const char *const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = { #define TLI_DEFINE_STRING #include "llvm/Analysis/TargetLibraryInfo.def" @@ -345,6 +355,8 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc::statvfs64); TLI.setUnavailable(LibFunc::tmpfile64); } + + TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary); } TargetLibraryInfoImpl::TargetLibraryInfoImpl() { @@ -452,6 +464,28 @@ void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef Fns) { std::sort(ScalarDescs.begin(), ScalarDescs.end(), compareByVectorFnName); } +void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( + enum VectorLibrary VecLib) { + switch (VecLib) { + case Accelerate: { + const VecDesc VecFuncs[] = { + {"expf", "vexpf", 4}, + {"llvm.exp.f32", "vexpf", 4}, + {"logf", "vlogf", 4}, + {"llvm.log.f32", "vlogf", 4}, + {"sqrtf", "vsqrtf", 4}, + {"llvm.sqrt.f32", "vsqrtf", 4}, + {"fabsf", "vfabsf", 4}, + {"llvm.fabs.f32", "vfabsf", 4}, + }; + addVectorizableFunctions(VecFuncs); + break; + } + case NoLibrary: + break; + } +} + bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const { funcName = sanitizeFunctionName(funcName); if (funcName.empty()) diff --git a/test/Transforms/LoopVectorize/X86/veclib-calls.ll b/test/Transforms/LoopVectorize/X86/veclib-calls.ll new file mode 100644 index 00000000000..62e0a44b508 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/veclib-calls.ll @@ -0,0 +1,182 @@ +; RUN: opt < %s -vector-library=Accelerate -loop-vectorize -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;CHECK-LABEL: @sqrt_f32( +;CHECK: vsqrtf{{.*}}<4 x float> +;CHECK: ret void +declare float @sqrtf(float) nounwind readnone +define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @sqrtf(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +;CHECK-LABEL: @exp_f32( +;CHECK: vexpf{{.*}}<4 x float> +;CHECK: ret void +declare float @expf(float) nounwind readnone +define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @expf(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +;CHECK-LABEL: @log_f32( +;CHECK: vlogf{{.*}}<4 x float> +;CHECK: ret void +declare float @logf(float) nounwind readnone +define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @logf(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +; For abs instruction we'll generate vector intrinsic, as it's cheaper than a lib call. +;CHECK-LABEL: @fabs_f32( +;CHECK: fabs{{.*}}<4 x float> +;CHECK: ret void +declare float @fabsf(float) nounwind readnone +define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @fabsf(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +; Test that we can vectorize an intrinsic into a vector call. +;CHECK-LABEL: @exp_f32_intrin( +;CHECK: vexpf{{.*}}<4 x float> +;CHECK: ret void +declare float @llvm.exp.f32(float) nounwind readnone +define void @exp_f32_intrin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @llvm.exp.f32(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +; Test that we don't vectorize arbitrary functions. +;CHECK-LABEL: @foo_f32( +;CHECK-NOT: foo{{.*}}<4 x float> +;CHECK: ret void +declare float @foo(float) nounwind readnone +define void @foo_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @foo(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +; Test that we don't vectorize calls with nobuiltin attribute. +;CHECK-LABEL: @sqrt_f32_nobuiltin( +;CHECK-NOT: vsqrtf{{.*}}<4 x float> +;CHECK: ret void +define void @sqrt_f32_nobuiltin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @sqrtf(float %0) nounwind readnone nobuiltin + %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +}