[SDAG] Handle LowerOperation returning its input consistently

[oota-llvm.git] / lib / CodeGen / SelectionDAG / SelectionDAG.cpp
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index f75d5f4b2bd200a8ef1466af8193fc86c5a21a3e..9466f4dd060a2dcb62c3eb64ed04d990084d3514 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1484,6 +1484,34 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
    if (N1.getOpcode() == ISD::UNDEF)
      commuteShuffle(N1, N2, MaskVec);
  
+  // If shuffling a splat, try to blend the splat instead. We do this here so
+  // that even when this arises during lowering we don't have to re-handle it.
+  auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) {
+    BitVector UndefElements;
+    SDValue Splat = BV->getSplatValue(&UndefElements);
+    if (!Splat)
+      return;
+
+    for (int i = 0; i < (int)NElts; ++i) {
+      if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + (int)NElts))
+        continue;
+
+      // If this input comes from undef, mark it as such.
+      if (UndefElements[MaskVec[i] - Offset]) {
+        MaskVec[i] = -1;
+        continue;
+      }
+
+      // If we can blend a non-undef lane, use that instead.
+      if (!UndefElements[i])
+        MaskVec[i] = i + Offset;
+    }
+  };
+  if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
+    BlendSplat(N1BV, 0);
+  if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2))
+    BlendSplat(N2BV, NElts);
+
    // Canonicalize all index into lhs, -> shuffle lhs, undef
    // Canonicalize all index into rhs, -> shuffle rhs, undef
    bool AllLHS = true, AllRHS = true;
@@ -1513,9 +1541,10 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
      return getUNDEF(VT);
  
    // If Identity shuffle return that node.
-  bool Identity = true;
+  bool Identity = true, AllSame = true;
    for (unsigned i = 0; i != NElts; ++i) {
      if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
+    if (MaskVec[i] != MaskVec[0]) AllSame = false;
    }
    if (Identity && NElts)
      return N1;
@@ -1537,18 +1566,35 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
        if (Splat && Splat.getOpcode() == ISD::UNDEF)
          return getUNDEF(VT);
  
+      bool SameNumElts =
+          V.getValueType().getVectorNumElements() == VT.getVectorNumElements();
+
        // We only have a splat which can skip shuffles if there is a splatted
        // value and no undef lanes rearranged by the shuffle.
        if (Splat && UndefElements.none()) {
          // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the
          // number of elements match or the value splatted is a zero constant.
-        if (V.getValueType().getVectorNumElements() ==
-            VT.getVectorNumElements())
+        if (SameNumElts)
            return N1;
          if (auto *C = dyn_cast<ConstantSDNode>(Splat))
            if (C->isNullValue())
              return N1;
        }
+
+      // If the shuffle itself creates a splat, build the vector directly.
+      if (AllSame && SameNumElts) {
+        const SDValue &Splatted = BV->getOperand(MaskVec[0]);
+        SmallVector<SDValue, 8> Ops(NElts, Splatted);
+
+        EVT BuildVT = BV->getValueType(0);
+        SDValue NewBV = getNode(ISD::BUILD_VECTOR, dl, BuildVT, Ops);
+
+        // We may have jumped through bitcasts, so the type of the
+        // BUILD_VECTOR may not match the type of the shuffle.
+        if (BuildVT != VT)
+          NewBV = getNode(ISD::BITCAST, dl, VT, NewBV);
+        return NewBV;
+      }
      }
    }
  
@@ -2323,6 +2369,21 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
      KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
      break;
    }
+  case ISD::EXTRACT_ELEMENT: {
+    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+    const unsigned Index =
+      cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+    const unsigned BitWidth = Op.getValueType().getSizeInBits();
+
+    // Remove low part of known bits mask
+    KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth);
+    KnownOne = KnownOne.getHiBits(KnownOne.getBitWidth() - Index * BitWidth);
+
+    // Remove high part of known bit mask
+    KnownZero = KnownZero.trunc(BitWidth);
+    KnownOne = KnownOne.trunc(BitWidth);
+    break;
+  }
    case ISD::FrameIndex:
    case ISD::TargetFrameIndex:
      if (unsigned Align = InferPtrAlignment(Op)) {
@@ -2522,6 +2583,21 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
      // FIXME: it's tricky to do anything useful for this, but it is an important
      // case for targets like X86.
      break;
+  case ISD::EXTRACT_ELEMENT: {
+    const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    const int BitWidth = Op.getValueType().getSizeInBits();
+    const int Items =
+      Op.getOperand(0).getValueType().getSizeInBits() / BitWidth;
+
+    // Get reverse index (starting from 1), Op1 value indexes elements from
+    // little end. Sign starts at big end.
+    const int rIndex = Items - 1 -
+      cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+
+    // If the sign portion ends in our element the substraction gives correct
+    // result. Otherwise it gives either negative or > bitwidth result
+    return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
+  }
    }
  
    // If we are looking at the loaded value of the SDNode.
@@ -3569,7 +3645,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
        const APFloat &V3 = N3CFP->getValueAPF();
        APFloat::opStatus s =
          V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
-      if (s != APFloat::opInvalidOp)
+      if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp)
          return getConstantFP(V1, VT);
      }
      break;
@@ -3918,9 +3994,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
    bool DstAlignCanChange = false;
    MachineFunction &MF = DAG.getMachineFunction();
    MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize =
-    MF.getFunction()->getAttributes().
-      hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
    FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
    if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
      DstAlignCanChange = true;
@@ -4033,8 +4107,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
    bool DstAlignCanChange = false;
    MachineFunction &MF = DAG.getMachineFunction();
    MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->getAttributes().
-    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
    FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
    if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
      DstAlignCanChange = true;
@@ -4128,8 +4201,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
    bool DstAlignCanChange = false;
    MachineFunction &MF = DAG.getMachineFunction();
    MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->getAttributes().
-    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
    FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
    if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
      DstAlignCanChange = true;
@@ -4219,11 +4291,13 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
  
    // Then check to see if we should lower the memcpy with target-specific
    // code. If the target chooses to do this, this is the next best.
-  SDValue Result =
-      TSI->EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
-                                   isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo);
-  if (Result.getNode())
-    return Result;
+  if (TSI) {
+    SDValue Result = TSI->EmitTargetCodeForMemcpy(
+        *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline,
+        DstPtrInfo, SrcPtrInfo);
+    if (Result.getNode())
+      return Result;
+  }
  
    // If we really need inline code and the target declined to provide it,
    // use a (potentially long) sequence of loads and stores.
@@ -4285,10 +4359,12 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
  
    // Then check to see if we should lower the memmove with target-specific
    // code. If the target chooses to do this, this is the next best.
-  SDValue Result = TSI->EmitTargetCodeForMemmove(
-      *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo);
-  if (Result.getNode())
-    return Result;
+  if (TSI) {
+    SDValue Result = TSI->EmitTargetCodeForMemmove(
+        *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo);
+    if (Result.getNode())
+      return Result;
+  }
  
    // FIXME: If the memmove is volatile, lowering it to plain libc memmove may
    // not be safe.  See memcpy above for more details.
@@ -4337,10 +4413,12 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
  
    // Then check to see if we should lower the memset with target-specific
    // code. If the target chooses to do this, this is the next best.
-  SDValue Result = TSI->EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src,
-                                                Size, Align, isVol, DstPtrInfo);
-  if (Result.getNode())
-    return Result;
+  if (TSI) {
+    SDValue Result = TSI->EmitTargetCodeForMemset(
+        *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo);
+    if (Result.getNode())
+      return Result;
+  }
  
    // Emit a library call.
    Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext());
@@ -4685,10 +4763,10 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
      assert(VT.isInteger() == MemVT.isInteger() &&
             "Cannot convert from FP to Int or Int -> FP!");
      assert(VT.isVector() == MemVT.isVector() &&
-           "Cannot use trunc store to convert to or from a vector!");
+           "Cannot use an ext load to convert to or from a vector!");
      assert((!VT.isVector() ||
              VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
-           "Cannot use trunc store to change the number of vector elements!");
+           "Cannot use an ext load to change the number of vector elements!");
    }
  
    bool Indexed = AM != ISD::UNINDEXED;