From: Simon Pilgrim Date: Sat, 12 Sep 2015 16:03:06 +0000 (+0000) Subject: [X86] Moved lowerVectorShuffleWithUNPCK earlier to make reuse easier. NFCI. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=9114b92030fd934999b21912f16504ad33c353b5;p=oota-llvm.git [X86] Moved lowerVectorShuffleWithUNPCK earlier to make reuse easier. NFCI. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@247511 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c338a4db23a..415ec312793 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6670,6 +6670,52 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef Mask, return Zeroable; } +// X86 has dedicated unpack instructions that can handle specific blend +// operations: UNPCKH and UNPCKL. +static SDValue lowerVectorShuffleWithUNPCK(SDLoc DL, MVT VT, ArrayRef Mask, + SDValue V1, SDValue V2, + SelectionDAG &DAG) { + int NumElts = VT.getVectorNumElements(); + bool Unpckl = true; + bool Unpckh = true; + bool UnpcklSwapped = true; + bool UnpckhSwapped = true; + int NumEltsInLane = 128 / VT.getScalarSizeInBits(); + + for (int i = 0; i < NumElts; ++i) { + unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane; + + int LoPos = (i % NumEltsInLane) / 2 + LaneStart + NumElts * (i % 2); + int HiPos = LoPos + NumEltsInLane / 2; + int LoPosSwapped = (LoPos + NumElts) % (NumElts * 2); + int HiPosSwapped = (HiPos + NumElts) % (NumElts * 2); + + if (Mask[i] == -1) + continue; + if (Mask[i] != LoPos) + Unpckl = false; + if (Mask[i] != HiPos) + Unpckh = false; + if (Mask[i] != LoPosSwapped) + UnpcklSwapped = false; + if (Mask[i] != HiPosSwapped) + UnpckhSwapped = false; + if (!Unpckl && !Unpckh && !UnpcklSwapped && !UnpckhSwapped) + return SDValue(); + } + if (Unpckl) + return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2); + if (Unpckh) + return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2); + if (UnpcklSwapped) + return DAG.getNode(X86ISD::UNPCKL, DL, VT, V2, V1); + if (UnpckhSwapped) + return DAG.getNode(X86ISD::UNPCKH, DL, VT, V2, V1); + + llvm_unreachable("Unexpected result of UNPCK mask analysis"); + return SDValue(); +} + /// \brief Try to emit a bitmask instruction for a shuffle. /// /// This handles cases where we can model a blend exactly as a bitmask due to @@ -10586,52 +10632,6 @@ static SDValue lowerVectorShuffleWithPERMV(SDLoc DL, MVT VT, return DAG.getNode(X86ISD::VPERMV3, DL, VT, V1, MaskNode, V2); } -// X86 has dedicated unpack instructions that can handle specific blend -// operations: UNPCKH and UNPCKL. -static SDValue lowerVectorShuffleWithUNPCK(SDLoc DL, MVT VT, - ArrayRef Mask, SDValue V1, - SDValue V2, SelectionDAG &DAG) { - int NumElts = VT.getVectorNumElements(); - bool Unpckl = true; - bool Unpckh = true; - bool UnpcklSwapped = true; - bool UnpckhSwapped = true; - int NumEltsInLane = 128 / VT.getScalarSizeInBits(); - - for (int i = 0; i < NumElts ; ++i) { - unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane; - - int LoPos = (i % NumEltsInLane) / 2 + LaneStart + NumElts * (i % 2); - int HiPos = LoPos + NumEltsInLane / 2; - int LoPosSwapped = (LoPos + NumElts) % (NumElts * 2); - int HiPosSwapped = (HiPos + NumElts) % (NumElts * 2); - - if (Mask[i] == -1) - continue; - if (Mask[i] != LoPos) - Unpckl = false; - if (Mask[i] != HiPos) - Unpckh = false; - if (Mask[i] != LoPosSwapped) - UnpcklSwapped = false; - if (Mask[i] != HiPosSwapped) - UnpckhSwapped = false; - if (!Unpckl && !Unpckh && !UnpcklSwapped && !UnpckhSwapped) - return SDValue(); - } - if (Unpckl) - return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2); - if (Unpckh) - return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2); - if (UnpcklSwapped) - return DAG.getNode(X86ISD::UNPCKL, DL, VT, V2, V1); - if (UnpckhSwapped) - return DAG.getNode(X86ISD::UNPCKH, DL, VT, V2, V1); - - llvm_unreachable("Unexpected result of UNPCK mask analysis"); - return SDValue(); -} - /// \brief Handle lowering of 8-lane 64-bit floating point shuffles. static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, @@ -10643,10 +10643,9 @@ static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); - SDValue UnpckNode = - lowerVectorShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG); - if (UnpckNode) - return UnpckNode; + if (SDValue Unpck = + lowerVectorShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG)) + return Unpck; return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG); } @@ -10662,10 +10661,9 @@ static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); - SDValue UnpckNode = - lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG); - if (UnpckNode) - return UnpckNode; + if (SDValue Unpck = + lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG)) + return Unpck; return lowerVectorShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG); } @@ -10681,10 +10679,9 @@ static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); - SDValue UnpckNode = - lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG); - if (UnpckNode) - return UnpckNode; + if (SDValue Unpck = + lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG)) + return Unpck; return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG); } @@ -10700,10 +10697,9 @@ static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); - SDValue UnpckNode = - lowerVectorShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG); - if (UnpckNode) - return UnpckNode; + if (SDValue Unpck = + lowerVectorShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG)) + return Unpck; return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG); } @@ -11474,7 +11470,7 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget, // If needed, look through a bitcast to get to the load. if (SubVec2.getNode() && SubVec2.getOpcode() == ISD::BITCAST) SubVec2 = SubVec2.getOperand(0); - + if (auto *FirstLd = dyn_cast(SubVec2)) { bool Fast; unsigned Alignment = FirstLd->getAlignment();