Fix a long standing deficiency in the X86 backend: we would

author Chris Lattner <sabre@nondot.org>

Sun, 25 Nov 2007 00:24:49 +0000 (00:24 +0000)

committer Chris Lattner <sabre@nondot.org>

Sun, 25 Nov 2007 00:24:49 +0000 (00:24 +0000)
author Chris Lattner <sabre@nondot.org>
Sun, 25 Nov 2007 00:24:49 +0000 (00:24 +0000)
committer Chris Lattner <sabre@nondot.org>
Sun, 25 Nov 2007 00:24:49 +0000 (00:24 +0000)
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp

index d43ac19e3f3fadfa4d391befe2687d6a33492791..73cecdb967e1c7bc0d7fe2b7b1d5fbcf4c8352cb 100644 (file)
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -842,20 +842,15 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDOperand Op, SDOperand Pred,
    // Also handle the case where we explicitly require zeros in the top
    // elements.  This is a vector shuffle from the zero vector.
    if (N.getOpcode() == ISD::VECTOR_SHUFFLE && N.Val->hasOneUse() &&
-      N.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
+      // Check to see if the top elements are all zeros (or bitcast of zeros).
+      ISD::isBuildVectorAllZeros(N.getOperand(0).Val) &&
        N.getOperand(1).getOpcode() == ISD::SCALAR_TO_VECTOR && 
        N.getOperand(1).Val->hasOneUse() &&
        ISD::isNON_EXTLoad(N.getOperand(1).getOperand(0).Val) &&
        N.getOperand(1).getOperand(0).hasOneUse()) {
-    // Check to see if the BUILD_VECTOR is building a zero vector.
-    SDOperand BV = N.getOperand(0);
-    for (unsigned i = 0, e = BV.getNumOperands(); i != e; ++i)
-      if (!isZeroNode(BV.getOperand(i)) &&
-          BV.getOperand(i).getOpcode() != ISD::UNDEF)
-        return false;  // Not a zero/undef vector.
      // Check to see if the shuffle mask is 4/L/L/L or 2/L, where L is something
      // from the LHS.
-    unsigned VecWidth = BV.getNumOperands();
+    unsigned VecWidth=MVT::getVectorNumElements(N.getOperand(0).getValueType());
      SDOperand ShufMask = N.getOperand(2);
      assert(ShufMask.getOpcode() == ISD::BUILD_VECTOR && "Invalid shuf mask!");
      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(ShufMask.getOperand(0))) {
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index d3c89f6de965c1b0222bb2af13531a4275a821a6..3fcae95e459e29672f37cf6bf105a34610bb192d 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2728,7 +2728,7 @@ static bool isPSHUFHW_PSHUFLWMask(SDNode *N) {
    return true;
  }
  
-/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
+/// CommuteVectorShuffle - Swap vector_shuffle operands as well as
  /// values in ther permute mask.
  static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1,
                                        SDOperand &V2, SDOperand &Mask,
@@ -2867,23 +2867,24 @@ static bool isZeroShuffle(SDNode *N) {
    unsigned NumElems = Mask.getNumOperands();
    for (unsigned i = 0; i != NumElems; ++i) {
      SDOperand Arg = Mask.getOperand(i);
-    if (Arg.getOpcode() != ISD::UNDEF) {
-      unsigned Idx = cast<ConstantSDNode>(Arg)->getValue();
-      if (Idx < NumElems) {
-        unsigned Opc = V1.Val->getOpcode();
-        if (Opc == ISD::UNDEF)
-          continue;
-        if (Opc != ISD::BUILD_VECTOR ||
-            !isZeroNode(V1.Val->getOperand(Idx)))
-          return false;
-      } else if (Idx >= NumElems) {
-        unsigned Opc = V2.Val->getOpcode();
-        if (Opc == ISD::UNDEF)
-          continue;
-        if (Opc != ISD::BUILD_VECTOR ||
-            !isZeroNode(V2.Val->getOperand(Idx - NumElems)))
-          return false;
-      }
+    if (Arg.getOpcode() == ISD::UNDEF)
+      continue;
+    
+    unsigned Idx = cast<ConstantSDNode>(Arg)->getValue();
+    if (Idx < NumElems) {
+      unsigned Opc = V1.Val->getOpcode();
+      if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.Val))
+        continue;
+      if (Opc != ISD::BUILD_VECTOR ||
+          !isZeroNode(V1.Val->getOperand(Idx)))
+        return false;
+    } else if (Idx >= NumElems) {
+      unsigned Opc = V2.Val->getOpcode();
+      if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.Val))
+        continue;
+      if (Opc != ISD::BUILD_VECTOR ||
+          !isZeroNode(V2.Val->getOperand(Idx - NumElems)))
+        return false;
      }
    }
    return true;
@@ -2893,14 +2894,35 @@ static bool isZeroShuffle(SDNode *N) {
  ///
  static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
    assert(MVT::isVector(VT) && "Expected a vector type");
-  unsigned NumElems = MVT::getVectorNumElements(VT);
-  MVT::ValueType EVT = MVT::getVectorElementType(VT);
-  bool isFP = MVT::isFloatingPoint(EVT);
-  SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
-  SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero);
-  return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size());
+  
+  // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest
+  // type.  This ensures they get CSE'd.
+  SDOperand Cst = DAG.getTargetConstant(0, MVT::i32);
+  SDOperand Vec;
+  if (MVT::getSizeInBits(VT) == 64)  // MMX
+    Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst);
+  else                                              // SSE
+    Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst);
+  return DAG.getNode(ISD::BIT_CONVERT, VT, Vec);
+}
+
+/// getOnesVector - Returns a vector of specified type with all bits set.
+///
+static SDOperand getOnesVector(MVT::ValueType VT, SelectionDAG &DAG) {
+  assert(MVT::isVector(VT) && "Expected a vector type");
+  
+  // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
+  // type.  This ensures they get CSE'd.
+  SDOperand Cst = DAG.getTargetConstant(~0U, MVT::i32);
+  SDOperand Vec;
+  if (MVT::getSizeInBits(VT) == 64)  // MMX
+    Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst);
+  else                                              // SSE
+    Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst);
+  return DAG.getNode(ISD::BIT_CONVERT, VT, Vec);
  }
  
+
  /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
  /// that point to V2 points to its first element.
  static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
@@ -2981,24 +3003,28 @@ static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
    }
    V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
  
-  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
-  Mask = getZeroVector(MaskVT, DAG);
+  Mask = getZeroVector(MVT::v4i32, DAG);
    SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
                                    DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
    return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
  }
  
  /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
-/// vector of zero or undef vector.
+/// vector of zero or undef vector.  This produces a shuffle where the low
+/// element of V2 is swizzled into the zero/undef vector, landing at element
+/// Idx.  This produces a shuffle mask like 4,1,2,3 (idx=0) or  0,1,2,4 (idx=3).
  static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
                                               unsigned NumElems, unsigned Idx,
                                               bool isZero, SelectionDAG &DAG) {
    SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
    MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
-  SDOperand Zero = DAG.getConstant(0, EVT);
-  SmallVector<SDOperand, 8> MaskVec(NumElems, Zero);
-  MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
+  SmallVector<SDOperand, 16> MaskVec;
+  for (unsigned i = 0; i != NumElems; ++i)
+    if (i == Idx)  // If this is the insertion idx, put the low elt of V2 here.
+      MaskVec.push_back(DAG.getConstant(NumElems, EVT));
+    else
+      MaskVec.push_back(DAG.getConstant(i, EVT));
    SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
                                 &MaskVec[0], MaskVec.size());
    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
@@ -3078,13 +3104,18 @@ static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
  
  SDOperand
  X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
-  // All zero's are handled with pxor.
-  if (ISD::isBuildVectorAllZeros(Op.Val))
-    return Op;
+  // All zero's are handled with pxor, all one's are handled with pcmpeqd.
+  if (ISD::isBuildVectorAllZeros(Op.Val) || ISD::isBuildVectorAllOnes(Op.Val)) {
+    // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to
+    // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
+    // eliminated on x86-32 hosts.
+    if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32)
+      return Op;
  
-  // All one's are handled with pcmpeqd.
-  if (ISD::isBuildVectorAllOnes(Op.Val))
-    return Op;
+    if (ISD::isBuildVectorAllOnes(Op.Val))
+      return getOnesVector(Op.getValueType(), DAG);
+    return getZeroVector(Op.getValueType(), DAG);
+  }
  
    MVT::ValueType VT = Op.getValueType();
    MVT::ValueType EVT = MVT::getVectorElementType(VT);
@@ -3113,12 +3144,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
    }
  
    if (NumNonZero == 0) {
-    if (NumZero == 0)
-      // All undef vector. Return an UNDEF.
-      return DAG.getNode(ISD::UNDEF, VT);
-    else
-      // A mix of zero and undef. Return a zero vector.
-      return getZeroVector(VT, DAG);
+    // All undef vector. Return an UNDEF.  All zero vectors were handled above.
+    return DAG.getNode(ISD::UNDEF, VT);
    }
  
    // Splat is obviously ok. Let legalizer expand it to a shuffle.
@@ -3299,8 +3326,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
      return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
  
    bool Commuted = false;
+  // FIXME: This should also accept a bitcast of a splat?  Be careful, not
+  // 1,1,1,1 -> v8i16 though.
    V1IsSplat = isSplatVector(V1.Val);
    V2IsSplat = isSplatVector(V2.Val);
+  
+  // Canonicalize the splat or undef, if present, to be on the RHS.
    if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
      Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
      std::swap(V1IsSplat, V2IsSplat);
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td

index b7024bc876684aa40e64618f234807ec2af0baff..c892c342334a596fb4d395d98463208cf2f9f6db 100644 (file)
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -486,14 +486,13 @@ def MMX_MASKMOVQ : MMXI<0xF7, MRMDestMem, (outs), (ins VR64:$src, VR64:$mask),
  //===----------------------------------------------------------------------===//
  
  // Alias instructions that map zero vector to pxor.
-// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
  let isReMaterializable = 1 in {
    def MMX_V_SET0       : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins),
                                "pxor\t$dst, $dst",
-                              [(set VR64:$dst, (v1i64 immAllZerosV))]>;
+                              [(set VR64:$dst, (v2i32 immAllZerosV))]>;
    def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (outs VR64:$dst), (ins),
                                "pcmpeqd\t$dst, $dst",
-                              [(set VR64:$dst, (v1i64 immAllOnesV))]>;
+                              [(set VR64:$dst, (v2i32 immAllOnesV))]>;
  }
  
  //===----------------------------------------------------------------------===//
@@ -510,18 +509,6 @@ def : Pat<(store (v2i32 VR64:$src), addr:$dst),
  def : Pat<(store (v1i64 VR64:$src), addr:$dst),
            (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
  
-// 64-bit vector all zero's.
-def : Pat<(v8i8  immAllZerosV), (MMX_V_SET0)>;
-def : Pat<(v4i16 immAllZerosV), (MMX_V_SET0)>;
-def : Pat<(v2i32 immAllZerosV), (MMX_V_SET0)>;
-def : Pat<(v1i64 immAllZerosV), (MMX_V_SET0)>;
-
-// 64-bit vector all one's.
-def : Pat<(v8i8  immAllOnesV), (MMX_V_SETALLONES)>;
-def : Pat<(v4i16 immAllOnesV), (MMX_V_SETALLONES)>;
-def : Pat<(v2i32 immAllOnesV), (MMX_V_SETALLONES)>;
-def : Pat<(v1i64 immAllOnesV), (MMX_V_SETALLONES)>;
-
  // Bit convert.
  def : Pat<(v8i8  (bitconvert (v1i64 VR64:$src))), (v8i8  VR64:$src)>;
  def : Pat<(v8i8  (bitconvert (v2i32 VR64:$src))), (v8i8  VR64:$src)>;
@@ -551,10 +538,10 @@ def MMX_X86s2vec : SDNode<"X86ISD::S2VEC",  SDTypeProfile<1, 1, []>, []>;
  // Move scalar to XMM zero-extended
  // movd to XMM register zero-extends
  let AddedComplexity = 15 in {
-  def : Pat<(v8i8 (vector_shuffle immAllZerosV,
+  def : Pat<(v8i8 (vector_shuffle immAllZerosV_bc,
                      (v8i8 (MMX_X86s2vec GR32:$src)), MMX_MOVL_shuffle_mask)),
              (MMX_MOVZDI2PDIrr GR32:$src)>;
-  def : Pat<(v4i16 (vector_shuffle immAllZerosV,
+  def : Pat<(v4i16 (vector_shuffle immAllZerosV_bc,
                      (v4i16 (MMX_X86s2vec GR32:$src)), MMX_MOVL_shuffle_mask)),
              (MMX_MOVZDI2PDIrr GR32:$src)>;
    def : Pat<(v2i32 (vector_shuffle immAllZerosV,
@@ -606,19 +593,19 @@ let AddedComplexity = 20 in {
  def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
                    VR64:$src2)),
            (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV))),
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV_bc))),
                    VR64:$src2)),
            (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8  immAllOnesV))),
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8  immAllOnesV_bc))),
                    VR64:$src2)),
            (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
  
  def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
                    (load addr:$src2))),
            (MMX_PANDNrm VR64:$src1, addr:$src2)>;
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV))),
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV_bc))),
                    (load addr:$src2))),
            (MMX_PANDNrm VR64:$src1, addr:$src2)>;
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8  immAllOnesV))),
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8  immAllOnesV_bc))),
                    (load addr:$src2))),
            (MMX_PANDNrm VR64:$src1, addr:$src2)>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 2c86e8d1c335d4620814c7c51ad5307c9c575c51..da23ccbaa09d168c6d0226de40154b9edded46b2 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -939,11 +939,10 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
                    "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
  
  // Alias instructions that map zero vector to pxor / xorp* for sse.
-// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
  let isReMaterializable = 1 in
  def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins),
                   "xorps\t$dst, $dst",
-                 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
+                 [(set VR128:$dst, (v4i32 immAllZerosV))]>;
  
  // FR32 to 128-bit vector conversion.
  def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src),
@@ -991,7 +990,7 @@ let isTwoAddress = 1 in {
  let AddedComplexity = 20 in
  def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
                        "movss\t{$src, $dst|$dst, $src}",
-                      [(set VR128:$dst, (v4f32 (vector_shuffle immAllZerosV,
+                      [(set VR128:$dst, (v4f32 (vector_shuffle immAllZerosV_bc,
                                   (v4f32 (scalar_to_vector (loadf32 addr:$src))),
                                                  MOVL_shuffle_mask)))]>;
  
@@ -2119,11 +2118,10 @@ def MFENCE : I<0xAE, MRM6m, (outs), (ins),
  
  
  // Alias instructions that map zero vector to pxor / xorp* for sse.
-// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
  let isReMaterializable = 1 in
    def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins),
                           "pcmpeqd\t$dst, $dst",
-                         [(set VR128:$dst, (v2f64 immAllOnesV))]>;
+                         [(set VR128:$dst, (v4i32 immAllOnesV))]>;
  
  // FR64 to 128-bit vector conversion.
  def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src),
@@ -2220,7 +2218,7 @@ let AddedComplexity = 20 in
    def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                          "movsd\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst,
-                          (v2f64 (vector_shuffle immAllZerosV,
+                          (v2f64 (vector_shuffle immAllZerosV_bc,
                                    (v2f64 (scalar_to_vector
                                            (loadf64 addr:$src))),
                                    MOVL_shuffle_mask)))]>;
@@ -2692,21 +2690,6 @@ def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
  def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
  def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
  
-// 128-bit vector all zero's.
-def : Pat<(v16i8 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
-def : Pat<(v8i16 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
-def : Pat<(v4i32 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
-def : Pat<(v2f64 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
-
-// 128-bit vector all one's.
-def : Pat<(v16i8 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
-def : Pat<(v8i16 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
-def : Pat<(v4i32 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
-def : Pat<(v4f32 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE1]>;
-
-
  // Scalar to v8i16 / v16i8. The source may be a GR32, but only the lower 8 or
  // 16-bits matter.
  def : Pat<(v8i16 (X86s2vec GR32:$src)), (MOVDI2PDIrr GR32:$src)>,
@@ -2751,17 +2734,17 @@ let Predicates = [HasSSE2] in {
  // Move scalar to XMM zero-extended
  // movd to XMM register zero-extends
  let AddedComplexity = 15 in {
-def : Pat<(v8i16 (vector_shuffle immAllZerosV,
+def : Pat<(v8i16 (vector_shuffle immAllZerosV_bc,
                    (v8i16 (X86s2vec GR32:$src)), MOVL_shuffle_mask)),
            (MOVZDI2PDIrr GR32:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v16i8 (vector_shuffle immAllZerosV,
+def : Pat<(v16i8 (vector_shuffle immAllZerosV_bc,
                    (v16i8 (X86s2vec GR32:$src)), MOVL_shuffle_mask)),
            (MOVZDI2PDIrr GR32:$src)>, Requires<[HasSSE2]>;
  // Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
-def : Pat<(v2f64 (vector_shuffle immAllZerosV,
+def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc,
                    (v2f64 (scalar_to_vector FR64:$src)), MOVL_shuffle_mask)),
            (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v4f32 (vector_shuffle immAllZerosV,
+def : Pat<(v4f32 (vector_shuffle immAllZerosV_bc,
                    (v4f32 (scalar_to_vector FR32:$src)), MOVL_shuffle_mask)),
            (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>;
  }
@@ -2911,7 +2894,7 @@ def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
  
  // Set lowest element and zero upper elements.
  let AddedComplexity = 20 in
-def : Pat<(bc_v2i64 (vector_shuffle immAllZerosV,
+def : Pat<(bc_v2i64 (vector_shuffle immAllZerosV_bc,
                       (v2f64 (scalar_to_vector (loadf64 addr:$src))),
                       MOVL_shuffle_mask)),
            (MOVZQI2PQIrm addr:$src)>, Requires<[HasSSE2]>;
diff --git a/test/CodeGen/X86/vec_zero_cse.ll b/test/CodeGen/X86/vec_zero_cse.ll

new file mode 100644 (file)

index 0000000..b882bad
--- /dev/null
+++ b/test/CodeGen/X86/vec_zero_cse.ll
@@ -0,0 +1,35 @@
+; RUN: llvm-as < %s | llc -relocation-model=static -mcpu=yonah | grep pxor | count 1
+; RUN: llvm-as < %s | llc -relocation-model=static -mcpu=yonah | grep xorps | count 1
+; RUN: llvm-as < %s | llc -relocation-model=static -mcpu=yonah | grep pcmpeqd | count 2
+
+@M1 = external global <1 x i64>
+@M2 = external global <2 x i32>
+
+@S1 = external global <2 x i64>
+@S2 = external global <4 x i32>
+
+define void @test() {
+  store <1 x i64> zeroinitializer, <1 x i64>* @M1
+  store <2 x i32> zeroinitializer, <2 x i32>* @M2
+  ret void
+}
+
+define void @test2() {
+  store <1 x i64> < i64 -1 >, <1 x i64>* @M1
+  store <2 x i32> < i32 -1, i32 -1 >, <2 x i32>* @M2
+  ret void
+}
+
+define void @test3() {
+  store <2 x i64> zeroinitializer, <2 x i64>* @S1
+  store <4 x i32> zeroinitializer, <4 x i32>* @S2
+  ret void
+}
+
+define void @test4() {
+  store <2 x i64> < i64 -1, i64 -1>, <2 x i64>* @S1
+  store <4 x i32> < i32 -1, i32 -1, i32 -1, i32 -1 >, <4 x i32>* @S2
+  ret void
+}
+
+
author	Chris Lattner <sabre@nondot.org>
	Sun, 25 Nov 2007 00:24:49 +0000 (00:24 +0000)
committer	Chris Lattner <sabre@nondot.org>
	Sun, 25 Nov 2007 00:24:49 +0000 (00:24 +0000)
lib/Target/X86/X86ISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86InstrMMX.td		patch \| blob \| history
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
test/CodeGen/X86/vec_zero_cse.ll	[new file with mode: 0644]	patch \| blob