From 8ffc96a163a9d4387feb06007d36ac1e419a8cd1 Mon Sep 17 00:00:00 2001 From: Karthik Bhat Date: Fri, 30 May 2014 04:31:24 +0000 Subject: [PATCH] Allow vectorization of intrinsics such as powi,cttz and ctlz in Loop and SLP Vectorizer. This patch adds support to vectorize intrinsics such as powi, cttz and ctlz in Vectorizer. These intrinsics are different from other intrinsics as second argument to these function must be same in order to vectorize them and it should be represented as a scalar. Review: http://reviews.llvm.org/D3851#inline-32769 and http://reviews.llvm.org/D3937#inline-32857 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209873 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Transforms/Utils/VectorUtils.h | 15 + lib/Transforms/Vectorize/LoopVectorize.cpp | 15 + lib/Transforms/Vectorize/SLPVectorizer.cpp | 29 +- test/Transforms/LoopVectorize/intrinsic.ll | 102 +++++++ .../Transforms/SLPVectorizer/X86/intrinsic.ll | 267 ++++++++++++++++++ 5 files changed, 426 insertions(+), 2 deletions(-) diff --git a/include/llvm/Transforms/Utils/VectorUtils.h b/include/llvm/Transforms/Utils/VectorUtils.h index e1d6c562923..44a7149eee9 100644 --- a/include/llvm/Transforms/Utils/VectorUtils.h +++ b/include/llvm/Transforms/Utils/VectorUtils.h @@ -48,12 +48,27 @@ static inline bool isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::pow: case Intrinsic::fma: case Intrinsic::fmuladd: + case Intrinsic::ctlz: + case Intrinsic::cttz: + case Intrinsic::powi: return true; default: return false; } } +static bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, + unsigned ScalarOpdIdx) { + switch (ID) { + case Intrinsic::ctlz: + case Intrinsic::cttz: + case Intrinsic::powi: + return (ScalarOpdIdx == 1); + default: + return false; + } +} + static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I, Intrinsic::ID ValidIntrinsicID) { if (I.getNumArgOperands() != 1 || diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index ba2b7eea363..15d4c1c79d2 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3123,9 +3123,14 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { scalarizeInstruction(it); break; default: + bool HasScalarOpd = hasVectorInstrinsicScalarOpd(ID, 1); for (unsigned Part = 0; Part < UF; ++Part) { SmallVector Args; for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { + if (HasScalarOpd && i == 1) { + Args.push_back(CI->getArgOperand(i)); + continue; + } VectorParts &Arg = getVectorValue(CI->getArgOperand(i)); Args.push_back(Arg[Part]); } @@ -3474,6 +3479,16 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { return false; } + // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the + // second argument is the same (i.e. loop invariant) + if (CI && + hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) { + if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) { + DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n"); + return false; + } + } + // Check that the instruction return type is vectorizable. // Also, we can't vectorize extractelement instructions. if ((!VectorType::isValidElementType(it->getType()) && diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index e13ba956c39..ce0a009e1cb 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -961,9 +961,10 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); return; } - Function *Int = CI->getCalledFunction(); - + Value *A1I = nullptr; + if (hasVectorInstrinsicScalarOpd(ID, 1)) + A1I = CI->getArgOperand(1); for (unsigned i = 1, e = VL.size(); i != e; ++i) { CallInst *CI2 = dyn_cast(VL[i]); if (!CI2 || CI2->getCalledFunction() != Int || @@ -973,6 +974,18 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { << "\n"); return; } + // ctlz,cttz and powi are special intrinsics whose second argument + // should be same in order for them to be vectorized. + if (hasVectorInstrinsicScalarOpd(ID, 1)) { + Value *A1J = CI2->getArgOperand(1); + if (A1I != A1J) { + newTreeEntry(VL, false); + DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI + << " argument "<< A1I<<"!=" << A1J + << "\n"); + return; + } + } } newTreeEntry(VL, true); @@ -1652,9 +1665,21 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::Call: { CallInst *CI = cast(VL0); setInsertPointAfterBundle(E->Scalars); + Function *FI; + Intrinsic::ID IID = Intrinsic::not_intrinsic; + if (CI && (FI = CI->getCalledFunction())) { + IID = (Intrinsic::ID) FI->getIntrinsicID(); + } std::vector OpVecs; for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) { ValueList OpVL; + // ctlz,cttz and powi are special intrinsics whose second argument is + // a scalar. This argument should not be vectorized. + if (hasVectorInstrinsicScalarOpd(IID, 1) && j == 1) { + CallInst *CEI = cast(E->Scalars[0]); + OpVecs.push_back(CEI->getArgOperand(j)); + continue; + } for (int i = 0, e = E->Scalars.size(); i < e; ++i) { CallInst *CEI = cast(E->Scalars[i]); OpVL.push_back(CEI->getArgOperand(j)); diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll index c3d570c03a7..7dfaf03b0f2 100644 --- a/test/Transforms/LoopVectorize/intrinsic.ll +++ b/test/Transforms/LoopVectorize/intrinsic.ll @@ -1090,3 +1090,105 @@ for.end: ; preds = %for.body ret void } +declare double @llvm.powi.f64(double %Val, i32 %power) nounwind readnone + +;CHECK-LABEL: @powi_f64( +;CHECK: llvm.powi.v4f64 +;CHECK: ret void +define void @powi_f64(i32 %n, double* noalias %y, double* noalias %x, i32 %P) nounwind uwtable { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv + %0 = load double* %arrayidx, align 8 + %call = tail call double @llvm.powi.f64(double %0, i32 %P) nounwind readnone + %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv + store double %call, double* %arrayidx4, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +;CHECK-LABEL: @powi_f64_neg( +;CHECK-NOT: llvm.powi.v4f64 +;CHECK: ret void +define void @powi_f64_neg(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv + %0 = load double* %arrayidx, align 8 + %1 = trunc i64 %indvars.iv to i32 + %call = tail call double @llvm.powi.f64(double %0, i32 %1) nounwind readnone + %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv + store double %call, double* %arrayidx4, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare i64 @llvm.cttz.i64 (i64, i1) nounwind readnone + +;CHECK-LABEL: @cttz_f64( +;CHECK: llvm.cttz.v4i64 +;CHECK: ret void +define void @cttz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv + %0 = load i64* %arrayidx, align 8 + %call = tail call i64 @llvm.cttz.i64(i64 %0, i1 true) nounwind readnone + %arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv + store i64 %call, i64* %arrayidx4, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare i64 @llvm.ctlz.i64 (i64, i1) nounwind readnone + +;CHECK-LABEL: @ctlz_f64( +;CHECK: llvm.ctlz.v4i64 +;CHECK: ret void +define void @ctlz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv + %0 = load i64* %arrayidx, align 8 + %call = tail call i64 @llvm.ctlz.i64(i64 %0, i1 true) nounwind readnone + %arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv + store i64 %call, i64* %arrayidx4, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} diff --git a/test/Transforms/SLPVectorizer/X86/intrinsic.ll b/test/Transforms/SLPVectorizer/X86/intrinsic.ll index 30c50936973..937252f4146 100644 --- a/test/Transforms/SLPVectorizer/X86/intrinsic.ll +++ b/test/Transforms/SLPVectorizer/X86/intrinsic.ll @@ -117,3 +117,270 @@ entry: ; CHECK: store <4 x i32> ; CHECK: ret } + +declare i32 @llvm.ctlz.i32(i32,i1) nounwind readnone + +define void @vec_ctlz_i32(i32* %a, i32* %b, i32* %c, i1) { +entry: + %i0 = load i32* %a, align 4 + %i1 = load i32* %b, align 4 + %add1 = add i32 %i0, %i1 + %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone + + %arrayidx2 = getelementptr inbounds i32* %a, i32 1 + %i2 = load i32* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i32 1 + %i3 = load i32* %arrayidx3, align 4 + %add2 = add i32 %i2, %i3 + %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 true) nounwind readnone + + %arrayidx4 = getelementptr inbounds i32* %a, i32 2 + %i4 = load i32* %arrayidx4, align 4 + %arrayidx5 = getelementptr inbounds i32* %b, i32 2 + %i5 = load i32* %arrayidx5, align 4 + %add3 = add i32 %i4, %i5 + %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone + + %arrayidx6 = getelementptr inbounds i32* %a, i32 3 + %i6 = load i32* %arrayidx6, align 4 + %arrayidx7 = getelementptr inbounds i32* %b, i32 3 + %i7 = load i32* %arrayidx7, align 4 + %add4 = add i32 %i6, %i7 + %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 true) nounwind readnone + + store i32 %call1, i32* %c, align 4 + %arrayidx8 = getelementptr inbounds i32* %c, i32 1 + store i32 %call2, i32* %arrayidx8, align 4 + %arrayidx9 = getelementptr inbounds i32* %c, i32 2 + store i32 %call3, i32* %arrayidx9, align 4 + %arrayidx10 = getelementptr inbounds i32* %c, i32 3 + store i32 %call4, i32* %arrayidx10, align 4 + ret void + +; CHECK-LABEL: @vec_ctlz_i32( +; CHECK: load <4 x i32> +; CHECK: load <4 x i32> +; CHECK: call <4 x i32> @llvm.ctlz.v4i32 +; CHECK: store <4 x i32> +; CHECK: ret +} + +define void @vec_ctlz_i32_neg(i32* %a, i32* %b, i32* %c, i1) { +entry: + %i0 = load i32* %a, align 4 + %i1 = load i32* %b, align 4 + %add1 = add i32 %i0, %i1 + %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone + + %arrayidx2 = getelementptr inbounds i32* %a, i32 1 + %i2 = load i32* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i32 1 + %i3 = load i32* %arrayidx3, align 4 + %add2 = add i32 %i2, %i3 + %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 false) nounwind readnone + + %arrayidx4 = getelementptr inbounds i32* %a, i32 2 + %i4 = load i32* %arrayidx4, align 4 + %arrayidx5 = getelementptr inbounds i32* %b, i32 2 + %i5 = load i32* %arrayidx5, align 4 + %add3 = add i32 %i4, %i5 + %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone + + %arrayidx6 = getelementptr inbounds i32* %a, i32 3 + %i6 = load i32* %arrayidx6, align 4 + %arrayidx7 = getelementptr inbounds i32* %b, i32 3 + %i7 = load i32* %arrayidx7, align 4 + %add4 = add i32 %i6, %i7 + %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 false) nounwind readnone + + store i32 %call1, i32* %c, align 4 + %arrayidx8 = getelementptr inbounds i32* %c, i32 1 + store i32 %call2, i32* %arrayidx8, align 4 + %arrayidx9 = getelementptr inbounds i32* %c, i32 2 + store i32 %call3, i32* %arrayidx9, align 4 + %arrayidx10 = getelementptr inbounds i32* %c, i32 3 + store i32 %call4, i32* %arrayidx10, align 4 + ret void + +; CHECK-LABEL: @vec_ctlz_i32_neg( +; CHECK-NOT: call <4 x i32> @llvm.ctlz.v4i32 + +} + + +declare i32 @llvm.cttz.i32(i32,i1) nounwind readnone + +define void @vec_cttz_i32(i32* %a, i32* %b, i32* %c, i1) { +entry: + %i0 = load i32* %a, align 4 + %i1 = load i32* %b, align 4 + %add1 = add i32 %i0, %i1 + %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone + + %arrayidx2 = getelementptr inbounds i32* %a, i32 1 + %i2 = load i32* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i32 1 + %i3 = load i32* %arrayidx3, align 4 + %add2 = add i32 %i2, %i3 + %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 true) nounwind readnone + + %arrayidx4 = getelementptr inbounds i32* %a, i32 2 + %i4 = load i32* %arrayidx4, align 4 + %arrayidx5 = getelementptr inbounds i32* %b, i32 2 + %i5 = load i32* %arrayidx5, align 4 + %add3 = add i32 %i4, %i5 + %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone + + %arrayidx6 = getelementptr inbounds i32* %a, i32 3 + %i6 = load i32* %arrayidx6, align 4 + %arrayidx7 = getelementptr inbounds i32* %b, i32 3 + %i7 = load i32* %arrayidx7, align 4 + %add4 = add i32 %i6, %i7 + %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 true) nounwind readnone + + store i32 %call1, i32* %c, align 4 + %arrayidx8 = getelementptr inbounds i32* %c, i32 1 + store i32 %call2, i32* %arrayidx8, align 4 + %arrayidx9 = getelementptr inbounds i32* %c, i32 2 + store i32 %call3, i32* %arrayidx9, align 4 + %arrayidx10 = getelementptr inbounds i32* %c, i32 3 + store i32 %call4, i32* %arrayidx10, align 4 + ret void + +; CHECK-LABEL: @vec_cttz_i32( +; CHECK: load <4 x i32> +; CHECK: load <4 x i32> +; CHECK: call <4 x i32> @llvm.cttz.v4i32 +; CHECK: store <4 x i32> +; CHECK: ret +} + +define void @vec_cttz_i32_neg(i32* %a, i32* %b, i32* %c, i1) { +entry: + %i0 = load i32* %a, align 4 + %i1 = load i32* %b, align 4 + %add1 = add i32 %i0, %i1 + %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone + + %arrayidx2 = getelementptr inbounds i32* %a, i32 1 + %i2 = load i32* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i32 1 + %i3 = load i32* %arrayidx3, align 4 + %add2 = add i32 %i2, %i3 + %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 false) nounwind readnone + + %arrayidx4 = getelementptr inbounds i32* %a, i32 2 + %i4 = load i32* %arrayidx4, align 4 + %arrayidx5 = getelementptr inbounds i32* %b, i32 2 + %i5 = load i32* %arrayidx5, align 4 + %add3 = add i32 %i4, %i5 + %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone + + %arrayidx6 = getelementptr inbounds i32* %a, i32 3 + %i6 = load i32* %arrayidx6, align 4 + %arrayidx7 = getelementptr inbounds i32* %b, i32 3 + %i7 = load i32* %arrayidx7, align 4 + %add4 = add i32 %i6, %i7 + %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 false) nounwind readnone + + store i32 %call1, i32* %c, align 4 + %arrayidx8 = getelementptr inbounds i32* %c, i32 1 + store i32 %call2, i32* %arrayidx8, align 4 + %arrayidx9 = getelementptr inbounds i32* %c, i32 2 + store i32 %call3, i32* %arrayidx9, align 4 + %arrayidx10 = getelementptr inbounds i32* %c, i32 3 + store i32 %call4, i32* %arrayidx10, align 4 + ret void + +; CHECK-LABEL: @vec_cttz_i32_neg( +; CHECK-NOT: call <4 x i32> @llvm.cttz.v4i32 +} + + +declare float @llvm.powi.f32(float, i32) +define void @vec_powi_f32(float* %a, float* %b, float* %c, i32 %P) { +entry: + %i0 = load float* %a, align 4 + %i1 = load float* %b, align 4 + %add1 = fadd float %i0, %i1 + %call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone + + %arrayidx2 = getelementptr inbounds float* %a, i32 1 + %i2 = load float* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds float* %b, i32 1 + %i3 = load float* %arrayidx3, align 4 + %add2 = fadd float %i2, %i3 + %call2 = tail call float @llvm.powi.f32(float %add2,i32 %P) nounwind readnone + + %arrayidx4 = getelementptr inbounds float* %a, i32 2 + %i4 = load float* %arrayidx4, align 4 + %arrayidx5 = getelementptr inbounds float* %b, i32 2 + %i5 = load float* %arrayidx5, align 4 + %add3 = fadd float %i4, %i5 + %call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone + + %arrayidx6 = getelementptr inbounds float* %a, i32 3 + %i6 = load float* %arrayidx6, align 4 + %arrayidx7 = getelementptr inbounds float* %b, i32 3 + %i7 = load float* %arrayidx7, align 4 + %add4 = fadd float %i6, %i7 + %call4 = tail call float @llvm.powi.f32(float %add4,i32 %P) nounwind readnone + + store float %call1, float* %c, align 4 + %arrayidx8 = getelementptr inbounds float* %c, i32 1 + store float %call2, float* %arrayidx8, align 4 + %arrayidx9 = getelementptr inbounds float* %c, i32 2 + store float %call3, float* %arrayidx9, align 4 + %arrayidx10 = getelementptr inbounds float* %c, i32 3 + store float %call4, float* %arrayidx10, align 4 + ret void + +; CHECK-LABEL: @vec_powi_f32( +; CHECK: load <4 x float> +; CHECK: load <4 x float> +; CHECK: call <4 x float> @llvm.powi.v4f32 +; CHECK: store <4 x float> +; CHECK: ret +} + + +define void @vec_powi_f32_neg(float* %a, float* %b, float* %c, i32 %P, i32 %Q) { +entry: + %i0 = load float* %a, align 4 + %i1 = load float* %b, align 4 + %add1 = fadd float %i0, %i1 + %call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone + + %arrayidx2 = getelementptr inbounds float* %a, i32 1 + %i2 = load float* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds float* %b, i32 1 + %i3 = load float* %arrayidx3, align 4 + %add2 = fadd float %i2, %i3 + %call2 = tail call float @llvm.powi.f32(float %add2,i32 %Q) nounwind readnone + + %arrayidx4 = getelementptr inbounds float* %a, i32 2 + %i4 = load float* %arrayidx4, align 4 + %arrayidx5 = getelementptr inbounds float* %b, i32 2 + %i5 = load float* %arrayidx5, align 4 + %add3 = fadd float %i4, %i5 + %call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone + + %arrayidx6 = getelementptr inbounds float* %a, i32 3 + %i6 = load float* %arrayidx6, align 4 + %arrayidx7 = getelementptr inbounds float* %b, i32 3 + %i7 = load float* %arrayidx7, align 4 + %add4 = fadd float %i6, %i7 + %call4 = tail call float @llvm.powi.f32(float %add4,i32 %Q) nounwind readnone + + store float %call1, float* %c, align 4 + %arrayidx8 = getelementptr inbounds float* %c, i32 1 + store float %call2, float* %arrayidx8, align 4 + %arrayidx9 = getelementptr inbounds float* %c, i32 2 + store float %call3, float* %arrayidx9, align 4 + %arrayidx10 = getelementptr inbounds float* %c, i32 3 + store float %call4, float* %arrayidx10, align 4 + ret void + +; CHECK-LABEL: @vec_powi_f32_neg( +; CHECK-NOT: call <4 x float> @llvm.powi.v4f32 +} -- 2.34.1