x86: Move bitcasts outside concat_vector.

author Jim Grosbach <grosbach@apple.com>

Thu, 17 Oct 2013 02:58:06 +0000 (02:58 +0000)

committer Jim Grosbach <grosbach@apple.com>

Thu, 17 Oct 2013 02:58:06 +0000 (02:58 +0000)
author Jim Grosbach <grosbach@apple.com>
Thu, 17 Oct 2013 02:58:06 +0000 (02:58 +0000)
committer Jim Grosbach <grosbach@apple.com>
Thu, 17 Oct 2013 02:58:06 +0000 (02:58 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 54d824419fd8a6da8d51bdcbe081ae9c0b736bda..4b11f2b61f3dc36dc9b54f64dcdc68e09ed4e472 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1498,6 +1498,7 @@ void X86TargetLowering::resetOperationActions() {
    }
  
    // We have target-specific dag combine patterns for the following nodes:
+  setTargetDAGCombine(ISD::CONCAT_VECTORS);
    setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
    setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
    setTargetDAGCombine(ISD::VSELECT);
@@ -16151,6 +16152,44 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
    return SDValue();
  }
  
+static SDValue PerformConcatCombine(SDNode *N, SelectionDAG &DAG,
+                                    TargetLowering::DAGCombinerInfo &DCI,
+                                    const X86Subtarget *Subtarget) {
+  // Creating a v8i16 from a v4i16 argument and an undef runs into trouble in
+  // type legalization and ends up spilling to the stack. Avoid that by
+  // creating a vector first and bitcasting the result rather than
+  // bitcasting the source then creating the vector. Similar problems with
+  // v8i8.
+
+  // No point in doing this after legalize, so early exit for that.
+  if (!DCI.isBeforeLegalize())
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (VT.getSizeInBits() == 128 && N->getNumOperands() == 2 &&
+      Op1->getOpcode() == ISD::UNDEF &&
+      Op0->getOpcode() == ISD::BITCAST &&
+      !TLI.isTypeLegal(Op0->getValueType(0)) &&
+      TLI.isTypeLegal(Op0->getOperand(0)->getValueType(0))) {
+    SDValue Scalar = Op0->getOperand(0);
+    // Any legal type here will be a simple value type.
+    MVT SVT = Scalar->getValueType(0).getSimpleVT();
+    // As a special case, bail out on MMX values.
+    if (SVT == MVT::x86mmx)
+      return SDValue();
+    EVT NVT = MVT::getVectorVT(SVT, 2);
+    SDLoc dl = SDLoc(N);
+    SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar);
+    Res = DAG.getNode(ISD::BITCAST, dl, VT, Res);
+    return Res;
+  }
+
+  return SDValue();
+}
+
  /// PerformShuffleCombine - Performs several different shuffle combines.
  static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
                                       TargetLowering::DAGCombinerInfo &DCI,
@@ -19029,6 +19068,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
    case X86ISD::VPERMILP:
    case X86ISD::VPERM2X128:
    case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
+  case ISD::CONCAT_VECTORS: return PerformConcatCombine(N, DAG, DCI, Subtarget);
    case ISD::FMA:            return PerformFMACombine(N, DAG, Subtarget);
    }
  
diff --git a/test/CodeGen/X86/pmovext.ll b/test/CodeGen/X86/pmovext.ll

index b85b4c39ea8eeacf26b5d866f7bbc7a85cb9b4ef..f0e468f53cb333cc6467abf1dea1deeeb0b0e8b4 100644 (file)
--- a/test/CodeGen/X86/pmovext.ll
+++ b/test/CodeGen/X86/pmovext.ll
@@ -18,5 +18,28 @@ define void @intrin_pmov(i16* noalias %dest, i8* noalias %src) nounwind uwtable
  }
  
  declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
-
  declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
+
+; rdar://15245794
+
+define <4 x i32> @foo0(double %v.coerce) nounwind ssp {
+; CHECK-LABEL: foo0
+; CHECK: pmovzxwd %xmm0, %xmm0
+; CHECK-NEXT: ret
+  %tmp = bitcast double %v.coerce to <4 x i16>
+  %tmp1 = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %tmp2 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp1) nounwind
+  ret <4 x i32> %tmp2
+}
+
+define <8 x i16> @foo1(double %v.coerce) nounwind ssp {
+; CHECK-LABEL: foo1
+; CHECK: pmovzxbw %xmm0, %xmm0
+; CHECK-NEXT: ret
+  %tmp = bitcast double %v.coerce to <8 x i8>
+  %tmp1 = shufflevector <8 x i8> %tmp, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %tmp2 = tail call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %tmp1)
+  ret <8 x i16> %tmp2
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
author	Jim Grosbach <grosbach@apple.com>
	Thu, 17 Oct 2013 02:58:06 +0000 (02:58 +0000)
committer	Jim Grosbach <grosbach@apple.com>
	Thu, 17 Oct 2013 02:58:06 +0000 (02:58 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/pmovext.ll		patch \| blob \| history