Reapplied D7816 & rL230177 & rL230278 - with an additional fix toensure that the...

author Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 24 Feb 2015 22:08:56 +0000 (22:08 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 24 Feb 2015 22:08:56 +0000 (22:08 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 24 Feb 2015 22:08:56 +0000 (22:08 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 24 Feb 2015 22:08:56 +0000 (22:08 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 51494d971d5dd53a7212c771ba2ce5d7886dc038..0437f59b4fefccb14930006c41b569eb66832469 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -385,7 +385,7 @@ namespace {
      bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
                                           EVT MemVT, unsigned NumElem,
                                           bool IsConstantSrc, bool UseVector);
-    
+
      /// Merge consecutive store operations into a wide store.
      /// This optimization uses wide integers or vectors when possible.
      /// \return True if some memory operations were changed.
@@ -4913,7 +4913,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
      std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
  
      MachineMemOperand *MMO = DAG.getMachineFunction().
-      getMachineMemOperand(MST->getPointerInfo(), 
+      getMachineMemOperand(MST->getPointerInfo(),
                             MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
                             Alignment, MST->getAAInfo(), MST->getRanges());
  
@@ -4925,7 +4925,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
                        DAG.getConstant(IncrementSize, Ptr.getValueType()));
  
      MMO = DAG.getMachineFunction().
-      getMachineMemOperand(MST->getPointerInfo(), 
+      getMachineMemOperand(MST->getPointerInfo(),
                             MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
                             SecondHalfAlignment, MST->getAAInfo(),
                             MST->getRanges());
@@ -4988,7 +4988,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
      std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
  
      MachineMemOperand *MMO = DAG.getMachineFunction().
-    getMachineMemOperand(MLD->getPointerInfo(), 
+    getMachineMemOperand(MLD->getPointerInfo(),
                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
                           Alignment, MLD->getAAInfo(), MLD->getRanges());
  
@@ -5000,7 +5000,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
                        DAG.getConstant(IncrementSize, Ptr.getValueType()));
  
      MMO = DAG.getMachineFunction().
-    getMachineMemOperand(MLD->getPointerInfo(), 
+    getMachineMemOperand(MLD->getPointerInfo(),
                           MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
                           SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
  
@@ -9949,11 +9949,11 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
    // Make sure we have something to merge.
    if (NumElem < 2)
      return false;
-  
+
    int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
    LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
    unsigned EarliestNodeUsed = 0;
-  
+
    for (unsigned i=0; i < NumElem; ++i) {
      // Find a chain for the new wide-store operand. Notice that some
      // of the store nodes that we found may not be selected for inclusion
@@ -9962,11 +9962,11 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
      if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
        EarliestNodeUsed = i;
    }
-  
+
    // The earliest Node in the DAG.
    LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
    SDLoc DL(StoreNodes[0].MemNode);
-  
+
    SDValue StoredVal;
    if (UseVector) {
      // Find a legal type for the vector store.
@@ -9989,7 +9989,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
            return false;
          Ops.push_back(Val);
        }
-      
+
        // Build the extracted vector elements back into a vector.
        StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
      }
@@ -10000,7 +10000,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
  
      unsigned StoreBW = NumElem * ElementSizeBytes * 8;
      APInt StoreInt(StoreBW, 0);
-    
+
      // Construct a single integer constant which is made of the smaller
      // constant inputs.
      bool IsLE = TLI.isLittleEndian();
@@ -10017,18 +10017,18 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
          llvm_unreachable("Invalid constant element type");
        }
      }
-    
+
      // Create the new Load and Store operations.
      EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
      StoredVal = DAG.getConstant(StoreInt, StoreTy);
    }
-  
+
    SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
                                    FirstInChain->getBasePtr(),
                                    FirstInChain->getPointerInfo(),
                                    false, false,
                                    FirstInChain->getAlignment());
-  
+
    // Replace the first store with the new store
    CombineTo(EarliestOp, NewStore);
    // Erase all other stores.
@@ -10050,7 +10050,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
        DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
      deleteAndRecombine(St);
    }
-  
+
    return true;
  }
  
@@ -10071,7 +10071,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
    bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
                         isa<ConstantFPSDNode>(StoredVal);
    bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
-   
+
    if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
      return false;
  
@@ -10281,7 +10281,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
        // consecutive loads).
        if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
          return false;
-      
+
        // Find a legal type for the vector store.
        EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
        if (TLI.isTypeLegal(Ty))
@@ -11197,7 +11197,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
    // Just because the floating-point vector type is legal does not necessarily
    // mean that the corresponding integer vector type is.
    if (!isTypeLegal(NVT))
-    return SDValue();     
+    return SDValue();
  
    SmallVector<SDValue, 8> Opnds;
    for (unsigned i = 0; i != NumInScalars; ++i) {
@@ -11354,10 +11354,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
  
          if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
            return SDValue();
-        
+
          // Try to replace VecIn1 with two extract_subvectors
          // No need to update the masks, they should still be correct.
-        VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, 
+        VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
            DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy()));
          VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
            DAG.getConstant(0, TLI.getVectorIdxTy()));
@@ -11430,36 +11430,56 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
      }
    }
  
+  // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
+  // We have already tested above for an UNDEF only concatenation.
    // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
    // -> (BUILD_VECTOR A, B, ..., C, D, ...)
-  if (N->getNumOperands() == 2 &&
-      N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
-      N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) {
-    EVT VT = N->getValueType(0);
-    SDValue N0 = N->getOperand(0);
-    SDValue N1 = N->getOperand(1);
+  auto IsBuildVectorOrUndef = [](const SDValue &Op) {
+    return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
+  };
+  bool AllBuildVectorsOrUndefs =
+      std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef);
+  if (AllBuildVectorsOrUndefs) {
      SmallVector<SDValue, 8> Opnds;
-    unsigned BuildVecNumElts =  N0.getNumOperands();
-
-    EVT SclTy0 = N0.getOperand(0)->getValueType(0);
-    EVT SclTy1 = N1.getOperand(0)->getValueType(0);
-    if (SclTy0.isFloatingPoint()) {
-      for (unsigned i = 0; i != BuildVecNumElts; ++i)
-        Opnds.push_back(N0.getOperand(i));
-      for (unsigned i = 0; i != BuildVecNumElts; ++i)
-        Opnds.push_back(N1.getOperand(i));
-    } else {
+    EVT SVT = VT.getScalarType();
+
+    EVT MinVT = SVT;
+    if (!SVT.isFloatingPoint()) {
        // If BUILD_VECTOR are from built from integer, they may have different
-      // operand types. Get the smaller type and truncate all operands to it.
-      EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1;
-      for (unsigned i = 0; i != BuildVecNumElts; ++i)
-        Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
-                        N0.getOperand(i)));
-      for (unsigned i = 0; i != BuildVecNumElts; ++i)
-        Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
-                        N1.getOperand(i)));
+      // operand types. Get the smallest type and truncate all operands to it.
+      bool FoundMinVT = false;
+      for (const SDValue &Op : N->ops())
+        if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+          EVT OpSVT = Op.getOperand(0)->getValueType(0);
+          MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
+          FoundMinVT = true;
+        }
+      assert(FoundMinVT && "Concat vector type mismatch");
      }
  
+    for (const SDValue &Op : N->ops()) {
+      EVT OpVT = Op.getValueType();
+      unsigned NumElts = OpVT.getVectorNumElements();
+
+      if (ISD::UNDEF == Op.getOpcode())
+        for (unsigned i = 0; i != NumElts; ++i)
+          Opnds.push_back(DAG.getUNDEF(MinVT));
+
+      if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+        if (SVT.isFloatingPoint()) {
+          assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
+          for (unsigned i = 0; i != NumElts; ++i)
+            Opnds.push_back(Op.getOperand(i));
+        } else {
+          for (unsigned i = 0; i != NumElts; ++i)
+            Opnds.push_back(
+                DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
+        }
+      }
+    }
+
+    assert(VT.getVectorNumElements() == Opnds.size() &&
+           "Concat vector type mismatch");
      return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
    }
  
@@ -11971,7 +11991,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
  
        if (!TLI.isShuffleMaskLegal(Mask, VT))
          return SDValue();
- 
+
        //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
        //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
        //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
diff --git a/test/CodeGen/X86/vector-zext.ll b/test/CodeGen/X86/vector-zext.ll

index 132e17fef4d222f2f9917f632d808f6a613d633b..568687dfd17fe34a757641e0f5bc5c082705f86e 100644 (file)
--- a/test/CodeGen/X86/vector-zext.ll
+++ b/test/CodeGen/X86/vector-zext.ll
@@ -358,22 +358,16 @@ define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone
  ;
  ; AVX1-LABEL: shuf_zext_8i16_to_8i32:
  ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
-; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1\r
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]\r
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero\r
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
  ; AVX1-NEXT:    retq
  ;
  ; AVX2-LABEL: shuf_zext_8i16_to_8i32:
  ; AVX2:       # BB#0: # %entry
-; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm1
-; AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; AVX2-NEXT:    vpunpcklwd{{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX2-NEXT:    # kill
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
  ; AVX2-NEXT:    retq
  entry:
    %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
@@ -410,22 +404,17 @@ define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone
  ;
  ; AVX1-LABEL: shuf_zext_4i32_to_4i64:
  ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[0,1],xmm1[0,0]
-; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
-; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero\r
+; AVX1-NEXT:    vxorpd %xmm2, %xmm2, %xmm2\r
+; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1]\r
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]\r
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0\r
  ; AVX1-NEXT:    retq
  ;
  ; AVX2-LABEL: shuf_zext_4i32_to_4i64:
  ; AVX2:       # BB#0: # %entry
  ; AVX2-NEXT:    # kill
  ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX2-NEXT:    xorl %eax, %eax
-; AVX2-NEXT:    vmovd %eax, %xmm1
-; AVX2-NEXT:    vpbroadcastd %xmm1, %ymm1
-; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
  ; AVX2-NEXT:    retq
  entry:
    %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 24 Feb 2015 22:08:56 +0000 (22:08 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 24 Feb 2015 22:08:56 +0000 (22:08 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/X86/vector-zext.ll		patch \| blob \| history