From 2234dddb07dfaf9ff292ee96efb28bc82a91b66f Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 11 Jan 2016 22:34:19 +0000 Subject: [PATCH] [LibCallSimplifier] use instruction-level fast-math-flags to transform sqrt calls This is a continuation of adding FMF to call instructions: http://reviews.llvm.org/rL255555 The intent of the patch is to preserve the current behavior of the transform except that we use the sqrt instruction's 'fast' attribute as a trigger rather than the function-level attribute. But this raises a bug noted by the new FIXME comment. In order to do this transform: sqrt((x * x) * y) ---> fabs(x) * sqrt(y) ...we need all of the sqrt, the first fmul, and the second fmul to be 'fast'. If any of those ops is strict, we should bail out. Differential Revision: http://reviews.llvm.org/D15937 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257400 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyLibCalls.cpp | 8 +-- test/Transforms/InstCombine/fast-math.ll | 54 +++++++++---------- .../InstCombine/inline-intrinsic-assert.ll | 12 ++--- .../Transforms/InstCombine/no_cgscc_assert.ll | 5 +- 4 files changed, 35 insertions(+), 44 deletions(-) diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index edf0db467ae..cf87ac1cf28 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1397,7 +1397,8 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" || Callee->getIntrinsicID() == Intrinsic::sqrt)) Ret = optimizeUnaryDoubleFP(CI, B, true); - if (!canUseUnsafeFPMath(CI->getParent()->getParent())) + + if (!CI->hasUnsafeAlgebra()) return Ret; Instruction *I = dyn_cast(CI->getArgOperand(0)); @@ -1406,7 +1407,7 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { // We're looking for a repeated factor in a multiplication tree, // so we can do this fold: sqrt(x * x) -> fabs(x); - // or this fold: sqrt(x * x * y) -> fabs(x) * sqrt(y). + // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y). Value *Op0 = I->getOperand(0); Value *Op1 = I->getOperand(1); Value *RepeatOp = nullptr; @@ -1421,6 +1422,7 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { // variations of this pattern because instcombine's visitFMUL and/or the // reassociation pass should give us this form. Value *OtherMul0, *OtherMul1; + // FIXME: This multiply must be unsafe to allow this transform. if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) { // Pattern: sqrt((x * y) * z) if (OtherMul0 == OtherMul1) { @@ -1435,8 +1437,6 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { // Fast math flags for any created instructions should match the sqrt // and multiply. - // FIXME: We're not checking the sqrt because it doesn't have - // fast-math-flags (see earlier comment). IRBuilder<>::FastMathFlagGuard Guard(B); B.SetFastMathFlags(I->getFastMathFlags()); // If we found a repeated factor, hoist it out of the square root and diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll index 5bdf48b85ce..b24ce278ca8 100644 --- a/test/Transforms/InstCombine/fast-math.ll +++ b/test/Transforms/InstCombine/fast-math.ll @@ -555,18 +555,12 @@ define float @fact_div6(float %x) { ; A squared factor fed into a square root intrinsic should be hoisted out ; as a fabs() value. -; We have to rely on a function-level attribute to enable this optimization -; because intrinsics don't currently have access to IR-level fast-math -; flags. If that changes, we can relax the requirement on all of these -; tests to just specify 'fast' on the sqrt. - -attributes #0 = { "unsafe-fp-math" = "true" } declare double @llvm.sqrt.f64(double) -define double @sqrt_intrinsic_arg_squared(double %x) #0 { +define double @sqrt_intrinsic_arg_squared(double %x) { %mul = fmul fast double %x, %x - %sqrt = call double @llvm.sqrt.f64(double %mul) + %sqrt = call fast double @llvm.sqrt.f64(double %mul) ret double %sqrt ; CHECK-LABEL: sqrt_intrinsic_arg_squared( @@ -577,10 +571,10 @@ define double @sqrt_intrinsic_arg_squared(double %x) #0 { ; Check all 6 combinations of a 3-way multiplication tree where ; one factor is repeated. -define double @sqrt_intrinsic_three_args1(double %x, double %y) #0 { +define double @sqrt_intrinsic_three_args1(double %x, double %y) { %mul = fmul fast double %y, %x %mul2 = fmul fast double %mul, %x - %sqrt = call double @llvm.sqrt.f64(double %mul2) + %sqrt = call fast double @llvm.sqrt.f64(double %mul2) ret double %sqrt ; CHECK-LABEL: sqrt_intrinsic_three_args1( @@ -590,10 +584,10 @@ define double @sqrt_intrinsic_three_args1(double %x, double %y) #0 { ; CHECK-NEXT: ret double %1 } -define double @sqrt_intrinsic_three_args2(double %x, double %y) #0 { +define double @sqrt_intrinsic_three_args2(double %x, double %y) { %mul = fmul fast double %x, %y %mul2 = fmul fast double %mul, %x - %sqrt = call double @llvm.sqrt.f64(double %mul2) + %sqrt = call fast double @llvm.sqrt.f64(double %mul2) ret double %sqrt ; CHECK-LABEL: sqrt_intrinsic_three_args2( @@ -603,10 +597,10 @@ define double @sqrt_intrinsic_three_args2(double %x, double %y) #0 { ; CHECK-NEXT: ret double %1 } -define double @sqrt_intrinsic_three_args3(double %x, double %y) #0 { +define double @sqrt_intrinsic_three_args3(double %x, double %y) { %mul = fmul fast double %x, %x %mul2 = fmul fast double %mul, %y - %sqrt = call double @llvm.sqrt.f64(double %mul2) + %sqrt = call fast double @llvm.sqrt.f64(double %mul2) ret double %sqrt ; CHECK-LABEL: sqrt_intrinsic_three_args3( @@ -616,10 +610,10 @@ define double @sqrt_intrinsic_three_args3(double %x, double %y) #0 { ; CHECK-NEXT: ret double %1 } -define double @sqrt_intrinsic_three_args4(double %x, double %y) #0 { +define double @sqrt_intrinsic_three_args4(double %x, double %y) { %mul = fmul fast double %y, %x %mul2 = fmul fast double %x, %mul - %sqrt = call double @llvm.sqrt.f64(double %mul2) + %sqrt = call fast double @llvm.sqrt.f64(double %mul2) ret double %sqrt ; CHECK-LABEL: sqrt_intrinsic_three_args4( @@ -629,10 +623,10 @@ define double @sqrt_intrinsic_three_args4(double %x, double %y) #0 { ; CHECK-NEXT: ret double %1 } -define double @sqrt_intrinsic_three_args5(double %x, double %y) #0 { +define double @sqrt_intrinsic_three_args5(double %x, double %y) { %mul = fmul fast double %x, %y %mul2 = fmul fast double %x, %mul - %sqrt = call double @llvm.sqrt.f64(double %mul2) + %sqrt = call fast double @llvm.sqrt.f64(double %mul2) ret double %sqrt ; CHECK-LABEL: sqrt_intrinsic_three_args5( @@ -642,10 +636,10 @@ define double @sqrt_intrinsic_three_args5(double %x, double %y) #0 { ; CHECK-NEXT: ret double %1 } -define double @sqrt_intrinsic_three_args6(double %x, double %y) #0 { +define double @sqrt_intrinsic_three_args6(double %x, double %y) { %mul = fmul fast double %x, %x %mul2 = fmul fast double %y, %mul - %sqrt = call double @llvm.sqrt.f64(double %mul2) + %sqrt = call fast double @llvm.sqrt.f64(double %mul2) ret double %sqrt ; CHECK-LABEL: sqrt_intrinsic_three_args6( @@ -655,10 +649,10 @@ define double @sqrt_intrinsic_three_args6(double %x, double %y) #0 { ; CHECK-NEXT: ret double %1 } -define double @sqrt_intrinsic_arg_4th(double %x) #0 { +define double @sqrt_intrinsic_arg_4th(double %x) { %mul = fmul fast double %x, %x %mul2 = fmul fast double %mul, %mul - %sqrt = call double @llvm.sqrt.f64(double %mul2) + %sqrt = call fast double @llvm.sqrt.f64(double %mul2) ret double %sqrt ; CHECK-LABEL: sqrt_intrinsic_arg_4th( @@ -666,11 +660,11 @@ define double @sqrt_intrinsic_arg_4th(double %x) #0 { ; CHECK-NEXT: ret double %mul } -define double @sqrt_intrinsic_arg_5th(double %x) #0 { +define double @sqrt_intrinsic_arg_5th(double %x) { %mul = fmul fast double %x, %x %mul2 = fmul fast double %mul, %x %mul3 = fmul fast double %mul2, %mul - %sqrt = call double @llvm.sqrt.f64(double %mul3) + %sqrt = call fast double @llvm.sqrt.f64(double %mul3) ret double %sqrt ; CHECK-LABEL: sqrt_intrinsic_arg_5th( @@ -686,9 +680,9 @@ declare float @sqrtf(float) declare double @sqrt(double) declare fp128 @sqrtl(fp128) -define float @sqrt_call_squared_f32(float %x) #0 { +define float @sqrt_call_squared_f32(float %x) { %mul = fmul fast float %x, %x - %sqrt = call float @sqrtf(float %mul) + %sqrt = call fast float @sqrtf(float %mul) ret float %sqrt ; CHECK-LABEL: sqrt_call_squared_f32( @@ -696,9 +690,9 @@ define float @sqrt_call_squared_f32(float %x) #0 { ; CHECK-NEXT: ret float %fabs } -define double @sqrt_call_squared_f64(double %x) #0 { +define double @sqrt_call_squared_f64(double %x) { %mul = fmul fast double %x, %x - %sqrt = call double @sqrt(double %mul) + %sqrt = call fast double @sqrt(double %mul) ret double %sqrt ; CHECK-LABEL: sqrt_call_squared_f64( @@ -706,9 +700,9 @@ define double @sqrt_call_squared_f64(double %x) #0 { ; CHECK-NEXT: ret double %fabs } -define fp128 @sqrt_call_squared_f128(fp128 %x) #0 { +define fp128 @sqrt_call_squared_f128(fp128 %x) { %mul = fmul fast fp128 %x, %x - %sqrt = call fp128 @sqrtl(fp128 %mul) + %sqrt = call fast fp128 @sqrtl(fp128 %mul) ret fp128 %sqrt ; CHECK-LABEL: sqrt_call_squared_f128( diff --git a/test/Transforms/InstCombine/inline-intrinsic-assert.ll b/test/Transforms/InstCombine/inline-intrinsic-assert.ll index c6446d43cff..8eecb3fd40a 100644 --- a/test/Transforms/InstCombine/inline-intrinsic-assert.ll +++ b/test/Transforms/InstCombine/inline-intrinsic-assert.ll @@ -4,7 +4,7 @@ ; The inliner should not add an edge to an intrinsic and ; then assert that it did not add an edge to an intrinsic! -define float @foo(float %f1) #0 { +define float @foo(float %f1) { %call = call float @bar(float %f1) ret float %call @@ -13,18 +13,16 @@ define float @foo(float %f1) #0 { ; CHECK-NEXT: ret float } -define float @bar(float %f1) #0 { +define float @bar(float %f1) { %call = call float @sqr(float %f1) - %call1 = call float @sqrtf(float %call) #0 + %call1 = call fast float @sqrtf(float %call) ret float %call1 } -define float @sqr(float %f) #0 { +define float @sqr(float %f) { %mul = fmul fast float %f, %f ret float %mul } -declare float @sqrtf(float) #0 - -attributes #0 = { "unsafe-fp-math"="true" } +declare float @sqrtf(float) diff --git a/test/Transforms/InstCombine/no_cgscc_assert.ll b/test/Transforms/InstCombine/no_cgscc_assert.ll index 3df04d2c890..677066fa2ab 100644 --- a/test/Transforms/InstCombine/no_cgscc_assert.ll +++ b/test/Transforms/InstCombine/no_cgscc_assert.ll @@ -6,7 +6,7 @@ define float @bar(float %f) #0 { %mul = fmul fast float %f, %f - %call1 = call float @sqrtf(float %mul) #0 + %call1 = call fast float @sqrtf(float %mul) ret float %call1 ; CHECK-LABEL: @bar( @@ -14,6 +14,5 @@ define float @bar(float %f) #0 { ; CHECK-NEXT: ret float } -declare float @sqrtf(float) #0 +declare float @sqrtf(float) -attributes #0 = { readnone "unsafe-fp-math"="true" } -- 2.34.1