From fa6cf7e73c9678e3aa50a5d8499afdd26bac831c Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 15 Sep 2014 20:09:47 +0000 Subject: [PATCH] [x86] Start fixing our emission of ADDSUBPS and ADDSUBPD instructions by introducing a synthetic X86 ISD node representing this generic operation. The relevant patterns for mapping these nodes into the concrete instructions are also added, and a gnarly bit of C++ code in the target-specific DAG combiner is replaced with simple code emitting this primitive. The next step is to generically combine blends of adds and subs into this node so that we can drop the reliance on an SSE4.1 ISD node (BLENDI) when matching an SSE3 feature (ADDSUB). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217819 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 30 ++++--------------------- lib/Target/X86/X86ISelLowering.h | 3 +++ lib/Target/X86/X86InstrFragmentsSIMD.td | 3 +++ lib/Target/X86/X86InstrSSE.td | 27 ++++++++++++++++++++++ 4 files changed, 37 insertions(+), 26 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b5744e093d2..166719b854f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6501,14 +6501,14 @@ static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG, for (unsigned i = 0, e = NumElts; i != e; i++) { SDValue Op = BV->getOperand(i); - + // Skip 'undef' values. unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::UNDEF) { std::swap(ExpectedOpcode, NextExpectedOpcode); continue; } - + // Early exit if we found an unexpected opcode. if (Opcode != ExpectedOpcode) return SDValue(); @@ -6565,31 +6565,9 @@ static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG, // Don't try to fold this build_vector into a VSELECT if it has // too many UNDEF operands. if (AddFound && SubFound && InVec0.getOpcode() != ISD::UNDEF && - InVec1.getOpcode() != ISD::UNDEF) { - // Emit a sequence of vector add and sub followed by a VSELECT. - // The new VSELECT will be lowered into a BLENDI. - // At ISel stage, we pattern-match the sequence 'add + sub + BLENDI' - // and emit a single ADDSUB instruction. - SDValue Sub = DAG.getNode(ExpectedOpcode, DL, VT, InVec0, InVec1); - SDValue Add = DAG.getNode(NextExpectedOpcode, DL, VT, InVec0, InVec1); - - // Construct the VSELECT mask. - EVT MaskVT = VT.changeVectorElementTypeToInteger(); - EVT SVT = MaskVT.getVectorElementType(); - unsigned SVTBits = SVT.getSizeInBits(); - SmallVector Ops; - - for (unsigned i = 0, e = NumElts; i != e; ++i) { - APInt Value = i & 1 ? APInt::getNullValue(SVTBits) : - APInt::getAllOnesValue(SVTBits); - SDValue Constant = DAG.getConstant(Value, SVT); - Ops.push_back(Constant); - } + InVec1.getOpcode() != ISD::UNDEF) + return DAG.getNode(X86ISD::ADDSUB, DL, VT, InVec0, InVec1); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVT, Ops); - return DAG.getSelect(DL, VT, Mask, Sub, Add); - } - return SDValue(); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 90d78369721..fef3d74b2d0 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -193,6 +193,9 @@ namespace llvm { /// BLENDI - Blend where the selector is an immediate. BLENDI, + /// ADDSUB - Combined add and sub on an FP vector. + ADDSUB, + // SUBUS - Integer sub with unsigned saturation. SUBUS, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index eba5ec25136..3b777d2407c 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -248,6 +248,9 @@ def X86Vextract : SDNode<"X86ISD::VEXTRACT", SDTypeProfile<1, 2, [SDTCisVec<1>, SDTCisPtrTy<2>]>, []>; def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>; + +def X86Addsub : SDNode<"X86ISD::ADDSUB", SDTFPBinOp>; + def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>; def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>; def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 71cc77214aa..54eb6f7d4c0 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5370,6 +5370,24 @@ let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { // Patterns used to select 'addsub' instructions. let Predicates = [HasAVX] in { + def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 VR128:$rhs))), + (VADDSUBPSrr VR128:$lhs, VR128:$rhs)>; + def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 (memop addr:$rhs)))), + (VADDSUBPSrm VR128:$lhs, f128mem:$rhs)>; + def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 VR128:$rhs))), + (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>; + def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 (memop addr:$rhs)))), + (VADDSUBPDrm VR128:$lhs, f128mem:$rhs)>; + + def : Pat<(v8f32 (X86Addsub (v8f32 VR256:$lhs), (v8f32 VR256:$rhs))), + (VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>; + def : Pat<(v8f32 (X86Addsub (v8f32 VR256:$lhs), (v8f32 (memop addr:$rhs)))), + (VADDSUBPSYrm VR256:$lhs, f256mem:$rhs)>; + def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (v4f64 VR256:$rhs))), + (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>; + def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (v4f64 (memop addr:$rhs)))), + (VADDSUBPDYrm VR256:$lhs, f256mem:$rhs)>; + // Constant 170 corresponds to the binary mask '10101010'. // When used as a blend mask, it allows selecting eight elements from two // input vectors as follow: @@ -5405,6 +5423,15 @@ let Predicates = [HasAVX] in { } let Predicates = [UseSSE3] in { + def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 VR128:$rhs))), + (ADDSUBPSrr VR128:$lhs, VR128:$rhs)>; + def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 (memop addr:$rhs)))), + (ADDSUBPSrm VR128:$lhs, f128mem:$rhs)>; + def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 VR128:$rhs))), + (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>; + def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 (memop addr:$rhs)))), + (ADDSUBPDrm VR128:$lhs, f128mem:$rhs)>; + // Constant 10 corresponds to the binary mask '1010'. // In the pattern below, it is used as a blend mask to select: // - the 1st and 3rd element from the first input vector (the fsub node); -- 2.34.1