From: Michael Liao Date: Wed, 10 Oct 2012 16:32:15 +0000 (+0000) Subject: Add alternative support for FP_ROUND from v2f32 to v2f64 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=9d796db3e746c31dbdb605510c53b3da98d71b38;p=oota-llvm.git Add alternative support for FP_ROUND from v2f32 to v2f64 - Due to the current matching vector elements constraints in ISD::FP_EXTEND, rounding from v2f32 to v2f64 is scalarized. Add a customized v2f32 widening to convert it into a target-specific X86ISD::VFPEXT to work around this constraints. This patch also reverts a previous attempt to fix this issue by recovering the scalarized ISD::FP_EXTEND pattern and thus significantly reduces the overhead of supporting non-power-2 vector FP extend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@165625 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 37f0e60087d..20b7ce6b15b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -634,7 +634,7 @@ private: SDValue WidenVecRes_InregOp(SDNode *N); // Widen Vector Operand. - bool WidenVectorOperand(SDNode *N, unsigned ResNo); + bool WidenVectorOperand(SDNode *N, unsigned OpNo); SDValue WidenVecOp_BITCAST(SDNode *N); SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index eca4d99098a..d51a6eb192e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2082,16 +2082,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { //===----------------------------------------------------------------------===// // Widen Vector Operand //===----------------------------------------------------------------------===// -bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Widen node operand " << ResNo << ": "; +bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); + // See if the target wants to custom widen this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + switch (N->getOpcode()) { default: #ifndef NDEBUG - dbgs() << "WidenVectorOperand op #" << ResNo << ": "; + dbgs() << "WidenVectorOperand op #" << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"; #endif diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7e43e5432d8..d551d9d0275 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -939,6 +939,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal); } @@ -5161,86 +5163,6 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64 -// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the -// constraint of matching input/output vector elements. -SDValue -X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const { - DebugLoc DL = Op.getDebugLoc(); - SDNode *N = Op.getNode(); - EVT VT = Op.getValueType(); - unsigned NumElts = Op.getNumOperands(); - - // Check supported types and sub-targets. - // - // Only v2f32 -> v2f64 needs special handling. - if (VT != MVT::v2f64 || !Subtarget->hasSSE2()) - return SDValue(); - - SDValue VecIn; - EVT VecInVT; - SmallVector Mask; - EVT SrcVT = MVT::Other; - - // Check the patterns could be translated into X86vfpext. - for (unsigned i = 0; i < NumElts; ++i) { - SDValue In = N->getOperand(i); - unsigned Opcode = In.getOpcode(); - - // Skip if the element is undefined. - if (Opcode == ISD::UNDEF) { - Mask.push_back(-1); - continue; - } - - // Quit if one of the elements is not defined from 'fpext'. - if (Opcode != ISD::FP_EXTEND) - return SDValue(); - - // Check how the source of 'fpext' is defined. - SDValue L2In = In.getOperand(0); - EVT L2InVT = L2In.getValueType(); - - // Check the original type - if (SrcVT == MVT::Other) - SrcVT = L2InVT; - else if (SrcVT != L2InVT) // Quit if non-homogenous typed. - return SDValue(); - - // Check whether the value being 'fpext'ed is extracted from the same - // source. - Opcode = L2In.getOpcode(); - - // Quit if it's not extracted with a constant index. - if (Opcode != ISD::EXTRACT_VECTOR_ELT || - !isa(L2In.getOperand(1))) - return SDValue(); - - SDValue ExtractedFromVec = L2In.getOperand(0); - - if (VecIn.getNode() == 0) { - VecIn = ExtractedFromVec; - VecInVT = ExtractedFromVec.getValueType(); - } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec. - return SDValue(); - - Mask.push_back(cast(L2In.getOperand(1))->getZExtValue()); - } - - // Quit if all operands of BUILD_VECTOR are undefined. - if (!VecIn.getNode()) - return SDValue(); - - // Fill the remaining mask as undef. - for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i) - Mask.push_back(-1); - - return DAG.getNode(X86ISD::VFPEXT, DL, VT, - DAG.getVectorShuffle(VecInVT, DL, - VecIn, DAG.getUNDEF(VecInVT), - &Mask[0])); -} - SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); @@ -5273,10 +5195,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (Broadcast.getNode()) return Broadcast; - SDValue FpExt = LowerVectorFpExtend(Op, DAG); - if (FpExt.getNode()) - return FpExt; - unsigned EVTBits = ExtVT.getSizeInBits(); unsigned NumZero = 0; @@ -8215,6 +8133,20 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, return FIST; } +SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + SDValue In = Op.getOperand(0); + EVT SVT = In.getValueType(); + + assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); + + return DAG.getNode(X86ISD::VFPEXT, DL, VT, + DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, + In, DAG.getUNDEF(SVT))); +} + SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); @@ -11407,6 +11339,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); + case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG); case ISD::FABS: return LowerFABS(Op, DAG); case ISD::FNEG: return LowerFNEG(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 1cae7ed2681..dca65b895e2 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -788,6 +788,7 @@ namespace llvm { SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; @@ -818,8 +819,6 @@ namespace llvm { SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const; - virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,