From 9e0a1565b9fb2cee34cd82ea33fc29261801a414 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Tue, 21 Apr 2015 17:21:36 +0000 Subject: [PATCH 1/1] X86: Match for X86ISD nodes in LowerBUILD_VECTOR instead of BUILD_VECTORCombine There doesn't seem to be a reason to perform this target ISD node matching in an DAGCombine, moving it to lowering fixes PR23296. Differential Revision: http://reviews.llvm.org/D9137 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235394 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 47 +++++++++++++++--------------- test/CodeGen/X86/haddsub.ll | 15 ++++++++++ 2 files changed, 38 insertions(+), 24 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1271d8d2aca..66b92283da8 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5210,7 +5210,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { /// \brief Return true if \p N implements a horizontal binop and return the /// operands for the horizontal binop into V0 and V1. /// -/// This is a helper function of PerformBUILD_VECTORCombine. +/// This is a helper function of LowerToHorizontalOp(). /// This function checks that the build_vector \p N in input implements a /// horizontal operation. Parameter \p Opcode defines the kind of horizontal /// operation to match. @@ -5307,7 +5307,7 @@ static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode, /// \brief Emit a sequence of two 128-bit horizontal add/sub followed by /// a concat_vector. /// -/// This is a helper function of PerformBUILD_VECTORCombine. +/// This is a helper function of LowerToHorizontalOp(). /// This function expects two 256-bit vectors called V0 and V1. /// At first, each vector is split into two separate 128-bit vectors. /// Then, the resulting 128-bit vectors are used to implement two @@ -5373,12 +5373,16 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1, return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI); } -/// \brief Try to fold a build_vector that performs an 'addsub' into the -/// sequence of 'vadd + vsub + blendi'. -static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { - SDLoc DL(BV); +/// Try to fold a build_vector that performs an 'addsub' to an X86ISD::ADDSUB +/// node. +static SDValue LowerToAddSub(const BuildVectorSDNode *BV, + const X86Subtarget *Subtarget, SelectionDAG &DAG) { EVT VT = BV->getValueType(0); + if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) && + (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64))) + return SDValue(); + + SDLoc DL(BV); unsigned NumElts = VT.getVectorNumElements(); SDValue InVec0 = DAG.getUNDEF(VT); SDValue InVec1 = DAG.getUNDEF(VT); @@ -5472,23 +5476,12 @@ static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG, return SDValue(); } -static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { - SDLoc DL(N); - EVT VT = N->getValueType(0); +/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible. +static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + EVT VT = BV->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); - BuildVectorSDNode *BV = cast(N); - SDValue InVec0, InVec1; - - // Try to match an ADDSUB. - if ((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || - (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) { - SDValue Value = matchAddSub(BV, DAG, Subtarget); - if (Value.getNode()) - return Value; - } - - // Try to match horizontal ADD/SUB. unsigned NumUndefsLO = 0; unsigned NumUndefsHI = 0; unsigned Half = NumElts/2; @@ -5507,6 +5500,8 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, if (NumUndefsLO + NumUndefsHI + 1 >= NumElts) return SDValue(); + SDLoc DL(BV); + SDValue InVec0, InVec1; if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) { // Try to match an SSE3 float HADD/HSUB. if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1)) @@ -5651,6 +5646,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl); } + BuildVectorSDNode *BV = cast(Op.getNode()); + if (SDValue AddSub = LowerToAddSub(BV, Subtarget, DAG)) + return AddSub; + if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG)) + return HorizontalOp; if (SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG)) return Broadcast; @@ -23927,7 +23927,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, break; } case X86ISD::BLENDI: return PerformBLENDICombine(N, DAG); - case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DAG, Subtarget); } return SDValue(); diff --git a/test/CodeGen/X86/haddsub.ll b/test/CodeGen/X86/haddsub.ll index 9feb5f6ea6e..6e65c6c739c 100644 --- a/test/CodeGen/X86/haddsub.ll +++ b/test/CodeGen/X86/haddsub.ll @@ -283,3 +283,18 @@ define <4 x double> @vhsubpd1(<4 x double> %x, <4 x double> %y) { %r = fsub <4 x double> %a, %b ret <4 x double> %r } + +; CHECK-LABEL: haddps_v2f32 +; CHECK: haddps %xmm{{[0-9]+}}, %xmm0 +; CHECK-NEXT: retq +define <2 x float> @haddps_v2f32(<4 x float> %v0) { + %v0.0 = extractelement <4 x float> %v0, i32 0 + %v0.1 = extractelement <4 x float> %v0, i32 1 + %v0.2 = extractelement <4 x float> %v0, i32 2 + %v0.3 = extractelement <4 x float> %v0, i32 3 + %op0 = fadd float %v0.0, %v0.1 + %op1 = fadd float %v0.2, %v0.3 + %res0 = insertelement <2 x float> undef, float %op0, i32 0 + %res1 = insertelement <2 x float> %res0, float %op1, i32 1 + ret <2 x float> %res1 +} -- 2.34.1