From 7c611d59cc2431b76de906733ce7ca22163e92e2 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Tue, 24 Feb 2015 19:11:00 +0000 Subject: [PATCH] Revert: Author: Simon Pilgrim Date: Mon Feb 23 23:04:28 2015 +0000 Fix based on post-commit comment on D7816 & rL230177 - BUILD_VECTOR operand truncation was using the the BV's output scalar type instead of the input type. and Author: Simon Pilgrim Date: Sun Feb 22 18:17:28 2015 +0000 [DagCombiner] Generalized BuildVector Vector Concatenation The CONCAT_VECTORS combiner pass can transform the concat of two BUILD_VECTOR nodes into a single BUILD_VECTOR node. This patch generalises this to support any number of BUILD_VECTOR nodes, and also permits UNDEF nodes to be included as well. This was noticed as AVX vec128 -> vec256 canonicalization sometimes creates a CONCAT_VECTOR with a real vec128 lower and an vec128 UNDEF upper. Differential Revision: http://reviews.llvm.org/D7816 as the root cause of PR22678 which is causing an assertion inside the DAG combiner. I'll follow up to the main thread as well. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230358 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 62 +++++++++--------------- test/CodeGen/X86/vector-zext.ll | 33 ++++++++----- 2 files changed, 45 insertions(+), 50 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2f8850d6f16..51494d971d5 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11430,52 +11430,36 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { } } - // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR. - // We have already tested above for an UNDEF only concatenation. // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) // -> (BUILD_VECTOR A, B, ..., C, D, ...) - auto IsBuildVectorOrUndef = [](const SDValue &Op) { - return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode(); - }; - bool AllBuildVectorsOrUndefs = - std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef); - if (AllBuildVectorsOrUndefs) { + if (N->getNumOperands() == 2 && + N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR && + N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) { + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); SmallVector Opnds; - EVT SVT = VT.getScalarType(); - - EVT MinVT = SVT; - if (!SVT.isFloatingPoint()) + unsigned BuildVecNumElts = N0.getNumOperands(); + + EVT SclTy0 = N0.getOperand(0)->getValueType(0); + EVT SclTy1 = N1.getOperand(0)->getValueType(0); + if (SclTy0.isFloatingPoint()) { + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(N0.getOperand(i)); + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(N1.getOperand(i)); + } else { // If BUILD_VECTOR are from built from integer, they may have different // operand types. Get the smaller type and truncate all operands to it. - for (const SDValue &Op : N->ops()) - if (ISD::BUILD_VECTOR == Op.getOpcode()) { - EVT OpSVT = Op.getOperand(0)->getValueType(0); - MinVT = MinVT.bitsLE(OpSVT) ? MinVT : OpSVT; - } - - for (const SDValue &Op : N->ops()) { - EVT OpVT = Op.getValueType(); - unsigned NumElts = OpVT.getVectorNumElements(); - - if (ISD::UNDEF == Op.getOpcode()) - for (unsigned i = 0; i != NumElts; ++i) - Opnds.push_back(DAG.getUNDEF(MinVT)); - - if (ISD::BUILD_VECTOR == Op.getOpcode()) { - if (SVT.isFloatingPoint()) { - assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch"); - for (unsigned i = 0; i != NumElts; ++i) - Opnds.push_back(Op.getOperand(i)); - } else { - for (unsigned i = 0; i != NumElts; ++i) - Opnds.push_back( - DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i))); - } - } + EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1; + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, + N0.getOperand(i))); + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, + N1.getOperand(i))); } - assert(VT.getVectorNumElements() == Opnds.size() && - "Concat vector type mismatch"); return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); } diff --git a/test/CodeGen/X86/vector-zext.ll b/test/CodeGen/X86/vector-zext.ll index 568687dfd17..132e17fef4d 100644 --- a/test/CodeGen/X86/vector-zext.ll +++ b/test/CodeGen/X86/vector-zext.ll @@ -358,16 +358,22 @@ define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ; ; AVX1-LABEL: shuf_zext_8i16_to_8i32: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuf_zext_8i16_to_8i32: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: # kill -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; AVX2-NEXT: vpunpcklwd{{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: retq entry: %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> @@ -404,17 +410,22 @@ define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ; ; AVX1-LABEL: shuf_zext_4i32_to_4i64: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1] -; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,1],xmm1[0,0] +; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2,1,3] +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuf_zext_4i32_to_4i64: ; AVX2: # BB#0: # %entry ; AVX2-NEXT: # kill ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: vmovd %eax, %xmm1 +; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1 +; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] ; AVX2-NEXT: retq entry: %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> -- 2.34.1