From: Andrea Di Biagio Date: Thu, 4 Jun 2015 19:15:01 +0000 (+0000) Subject: [DAGCombiner] Fix wrong folding of a build_vector into a blend with zero. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=c07ee0c4ff45f4a5661cce9ed8e3e799a30bd952;p=oota-llvm.git [DAGCombiner] Fix wrong folding of a build_vector into a blend with zero. Method 'visitBUILD_VECTOR' in the DAGCombiner knows how to combine a build_vector of a bunch of extract_vector_elt nodes and constant zero nodes into a shuffle blend with a zero vector. However, method 'visitBUILD_VECTOR' forgot that a floating point build_vector may contain negative zero as well as positive zero. Example: define <2 x double> @example(<2 x double> %A) { entry: %0 = extractelement <2 x double> %A, i32 0 %1 = insertelement <2 x double> undef, double %0, i32 0 %2 = insertelement <2 x double> %1, double -0.0, i32 1 ret <2 x double> %2 } Before this patch, llc (with -mattr=+sse4.1) wrongly generated movq %xmm0, %xmm0 # xmm0 = xmm0[0],zero So, the sign bit of the negative zero was effectively lost. This patch fixes the problem by adding explicit checks for positive zero. With this patch, llc produces the following code for the example above: movhpd .LCPI0_0(%rip), %xmm0 where .LCPI0_0 referes to a 'double -0'. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@239070 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 439ddb292e4..e9cba08d333 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1587,6 +1587,11 @@ static bool isNullConstant(SDValue V) { return Const != nullptr && Const->isNullValue(); } +static bool isNullFPConstant(SDValue V) { + ConstantFPSDNode *Const = dyn_cast(V); + return Const != nullptr && Const->isZero() && !Const->isNegative(); +} + static bool isAllOnesConstant(SDValue V) { ConstantSDNode *Const = dyn_cast(V); return Const != nullptr && Const->isAllOnesValue(); @@ -11912,9 +11917,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (Op.getOpcode() == ISD::UNDEF) continue; // See if we can combine this build_vector into a blend with a zero vector. - if (!VecIn2.getNode() && (isNullConstant(Op) || - (Op.getOpcode() == ISD::ConstantFP && - cast(Op.getNode())->getValueAPF().isZero()))) { + if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) { UsesZeroVector = true; continue; } diff --git a/test/CodeGen/X86/fold-buildvector-bug.ll b/test/CodeGen/X86/fold-buildvector-bug.ll new file mode 100644 index 00000000000..76deab7edae --- /dev/null +++ b/test/CodeGen/X86/fold-buildvector-bug.ll @@ -0,0 +1,40 @@ +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s + +; Verify that the DAGCombiner doesn't wrongly fold a build_vector into a +; blend with a zero vector if the build_vector contains negative zero. +; +; TODO: the codegen for function 'test_negative_zero_1' is sub-optimal. +; Ideally, we should generate a single shuffle blend operation. + +define <4 x float> @test_negative_zero_1(<4 x float> %A) { +; CHECK-LABEL: test_negative_zero_1: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movapd %xmm0, %xmm1 +; CHECK-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] +; CHECK-NEXT: xorps %xmm2, %xmm2 +; CHECK-NEXT: blendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; CHECK-NEXT: retq +entry: + %0 = extractelement <4 x float> %A, i32 0 + %1 = insertelement <4 x float> undef, float %0, i32 0 + %2 = insertelement <4 x float> %1, float -0.0, i32 1 + %3 = extractelement <4 x float> %A, i32 2 + %4 = insertelement <4 x float> %2, float %3, i32 2 + %5 = insertelement <4 x float> %4, float 0.0, i32 3 + ret <4 x float> %5 +} + +define <2 x double> @test_negative_zero_2(<2 x double> %A) { +; CHECK-LABEL: test_negative_zero_2: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movhpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq +entry: + %0 = extractelement <2 x double> %A, i32 0 + %1 = insertelement <2 x double> undef, double %0, i32 0 + %2 = insertelement <2 x double> %1, double -0.0, i32 1 + ret <2 x double> %2 +}