This patch addresses the problem of poor code generation for the zext

author Nadav Rotem <nadav.rotem@intel.com>

Sun, 12 Feb 2012 15:05:31 +0000 (15:05 +0000)

committer Nadav Rotem <nadav.rotem@intel.com>

Sun, 12 Feb 2012 15:05:31 +0000 (15:05 +0000)
author Nadav Rotem <nadav.rotem@intel.com>
Sun, 12 Feb 2012 15:05:31 +0000 (15:05 +0000)
committer Nadav Rotem <nadav.rotem@intel.com>
Sun, 12 Feb 2012 15:05:31 +0000 (15:05 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index d029906fc31ab443cb3a4f1d30bc464e67c61b60..9fa5572e815f7c58bed97c8fd32a5cd749fbe56c 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7253,7 +7253,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
    // same source type and all of the inputs must be any or zero extend.
    // Scalar sizes must be a power of two.
    EVT OutScalarTy = N->getValueType(0).getScalarType();
-  bool validTypes = SourceType != MVT::Other &&
+  bool ValidTypes = SourceType != MVT::Other &&
                   isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
                   isPowerOf2_32(SourceType.getSizeInBits());
  
@@ -7263,7 +7263,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
    // will be type-legalized to complex code sequences.
    // We perform this optimization only before the operation legalizer because we
    // may introduce illegal operations.
-  if (LegalTypes && !LegalOperations && validTypes) {
+  if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) &&
+      ValidTypes) {
      bool isLE = TLI.isLittleEndian();
      unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
      assert(ElemRatio > 1 && "Invalid element size ratio");
@@ -7322,15 +7323,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
        break;
      }
  
-    // If the input vector type disagrees with the result of the build_vector,
-    // we can't make a shuffle.
+    // We allow up to two distinct input vectors.
      SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
-    if (ExtractedFromVec.getValueType() != VT) {
-      VecIn1 = VecIn2 = SDValue(0, 0);
-      break;
-    }
-
-    // Otherwise, remember this.  We allow up to two distinct input vectors.
      if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
        continue;
  
@@ -7345,7 +7339,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
      }
    }
  
-  // If everything is good, we can make a shuffle operation.
+    // If everything is good, we can make a shuffle operation.
    if (VecIn1.getNode()) {
      SmallVector<int, 8> Mask;
      for (unsigned i = 0; i != NumInScalars; ++i) {
@@ -7371,14 +7365,35 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
        Mask.push_back(Idx+NumInScalars);
      }
  
-    // Add count and size info.
-    if (!isTypeLegal(VT))
+    // We can't generate a shuffle node with mismatched input and output types.
+    // Attempt to transform a single input vector to the correct type.
+    if ((VT != VecIn1.getValueType())) {
+      // We don't support shuffeling between TWO values of different types.
+      if (VecIn2.getNode() != 0)
+        return SDValue();
+
+      // We only support widening of vectors which are half the size of the
+      // output registers. For example XMM->YMM widening on X86 with AVX.
+      if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
+        return SDValue();
+
+      // Widen the input vector by adding undef values.
+      VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+                           VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+    }
+
+    // If VecIn2 is unused then change it to undef.
+    VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+
+    // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
+    if (!isTypeLegal(VT) || !isTypeLegal(VecIn1.getValueType()) ||
+        !isTypeLegal(VecIn2.getValueType()))
        return SDValue();
  
      // Return the new VECTOR_SHUFFLE node.
      SDValue Ops[2];
      Ops[0] = VecIn1;
-    Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+    Ops[1] = VecIn2;
      return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
    }
  
diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll

index 8b7af20b4a98332cfe0d3a11682cd5bfa20a6845..977093527609f8499a1c66ec45bdd29dddbb74b9 100644 (file)
--- a/test/CodeGen/CellSPU/rotate_ops.ll
+++ b/test/CodeGen/CellSPU/rotate_ops.ll
@@ -1,5 +1,5 @@
  ; RUN: llc < %s -march=cellspu -o %t1.s
-; RUN: grep rot          %t1.s | count 85
+; RUN: grep rot          %t1.s | count 86
  ; RUN: grep roth         %t1.s | count 8
  ; RUN: grep roti.*5      %t1.s | count 1
  ; RUN: grep roti.*27     %t1.s | count 1
diff --git a/test/CodeGen/X86/avx-zext.ll b/test/CodeGen/X86/avx-zext.ll

index 795a7b3dd45c2c35ccdf114e115092db8823db93..b630e9d1461258e5f19b6436eb278f2d9e496817 100755 (executable)
--- a/test/CodeGen/X86/avx-zext.ll
+++ b/test/CodeGen/X86/avx-zext.ll
@@ -3,6 +3,7 @@
  define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
  ;CHECK: zext_8i16_to_8i32
  ;CHECK: vpunpckhwd
+;CHECK: ret
  
    %B = zext <8 x i16> %A to <8 x i32>
    ret <8 x i32>%B
@@ -11,7 +12,19 @@ define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp
  define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
  ;CHECK: zext_4i32_to_4i64
  ;CHECK: vpunpckhdq
+;CHECK: ret
  
    %B = zext <4 x i32> %A to <4 x i64>
    ret <4 x i64>%B
  }
+
+
+define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
+;CHECK: zext_8i8_to_8i32
+;CHECK: vpunpckhwd
+;CHECK: vpunpcklwd
+;CHECK: vinsertf128
+;CHECK: ret
+  %t = zext <8 x i8> %z to <8 x i32>
+  ret <8 x i32> %t
+}
author	Nadav Rotem <nadav.rotem@intel.com>
	Sun, 12 Feb 2012 15:05:31 +0000 (15:05 +0000)
committer	Nadav Rotem <nadav.rotem@intel.com>
	Sun, 12 Feb 2012 15:05:31 +0000 (15:05 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/CellSPU/rotate_ops.ll		patch \| blob \| history
test/CodeGen/X86/avx-zext.ll		patch \| blob \| history