[X86][SSE} Add INSERTPS as a target shuffle

author Simon Pilgrim <llvm-dev@redking.me.uk>

Thu, 7 Jan 2016 10:24:19 +0000 (10:24 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Thu, 7 Jan 2016 10:24:19 +0000 (10:24 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Thu, 7 Jan 2016 10:24:19 +0000 (10:24 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Thu, 7 Jan 2016 10:24:19 +0000 (10:24 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index a2591a3cfd3ae4afd8f5f214efe068f38c5a4d4e..d32146b32cf63baa5ddb697fad1bd745cef06590 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -3907,6 +3907,7 @@ static bool isTargetShuffle(unsigned Opcode) {
    case X86ISD::PSHUFHW:
    case X86ISD::PSHUFLW:
    case X86ISD::SHUFP:
+  case X86ISD::INSERTPS:
    case X86ISD::PALIGNR:
    case X86ISD::MOVLHPS:
    case X86ISD::MOVLHPD:
@@ -4760,6 +4761,11 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
      DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
      IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
      break;
+  case X86ISD::INSERTPS:
+    ImmN = N->getOperand(N->getNumOperands()-1);
+    DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+    IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
+    break;
    case X86ISD::UNPCKH:
      DecodeUNPCKHMask(VT, Mask);
      IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
@@ -23860,6 +23866,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
  
    SDValue InVec = N->getOperand(0);
    SDValue EltNo = N->getOperand(1);
+  EVT EltVT = N->getValueType(0);
  
    if (!isa<ConstantSDNode>(EltNo))
      return SDValue();
@@ -23888,14 +23895,22 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
  
    SmallVector<int, 16> ShuffleMask;
    bool UnaryShuffle;
-  if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(),
-                            false, ShuffleMask, UnaryShuffle))
+  if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(), true,
+                            ShuffleMask, UnaryShuffle))
      return SDValue();
  
    // Select the input vector, guarding against out of range extract vector.
    unsigned NumElems = CurrentVT.getVectorNumElements();
    int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
    int Idx = (Elt > (int)NumElems) ? SM_SentinelUndef : ShuffleMask[Elt];
+
+  if (Idx == SM_SentinelZero)
+    return EltVT.isInteger() ? DAG.getConstant(0, SDLoc(N), EltVT)
+                             : DAG.getConstantFP(+0.0, SDLoc(N), EltVT);
+  if (Idx == SM_SentinelUndef)
+    return DAG.getUNDEF(EltVT);
+
+  assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range");
    SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0)
                                           : InVec.getOperand(1);
  
@@ -23920,7 +23935,6 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
    if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
      return SDValue();
  
-  EVT EltVT = N->getValueType(0);
    // If there's a bitcast before the shuffle, check if the load type and
    // alignment is valid.
    unsigned Align = LN0->getAlignment();
diff --git a/test/CodeGen/X86/insertps-combine.ll b/test/CodeGen/X86/insertps-combine.ll

index 655f8f49f83811da8e514c38feb35ee92b25b95c..f2596b6347b90a2f212e3851c28da8f45159e888 100644 (file)
--- a/test/CodeGen/X86/insertps-combine.ll
+++ b/test/CodeGen/X86/insertps-combine.ll
@@ -109,3 +109,36 @@ define <4 x float> @shuffle_v4f32_0z6z(<4 x float> %A, <4 x float> %B) {
    %vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
    ret <4 x float> %vecinit4
  }
+
+define float @extract_zero_insertps_z0z7(<4 x float> %a0, <4 x float> %a1) {
+; SSE-LABEL: extract_zero_insertps_z0z7:
+; SSE:       # BB#0:
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: extract_zero_insertps_z0z7:
+; AVX:       # BB#0:
+; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 21)
+  %ext = extractelement <4 x float> %res, i32 0
+  ret float %ext
+}
+
+define float @extract_lane_insertps_5123(<4 x float> %a0, <4 x float> *%p1) {
+; SSE-LABEL: extract_lane_insertps_5123:
+; SSE:       # BB#0:
+; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: extract_lane_insertps_5123:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-NEXT:    retq
+  %a1 = load <4 x float>, <4 x float> *%p1
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 128)
+  %ext = extractelement <4 x float> %res, i32 0
+  ret float %ext
+}
+
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Thu, 7 Jan 2016 10:24:19 +0000 (10:24 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Thu, 7 Jan 2016 10:24:19 +0000 (10:24 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/insertps-combine.ll		patch \| blob \| history