Revert r225048: It broke ObjC on AArch64.

[oota-llvm.git] / lib / Target / X86 / X86ISelLowering.cpp
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 77a1d6e69febe440ec063328ebaba000dd8f67e9..e8bac7b9312d4c0c47c4b87f84d3fb6b909a02d2 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -3874,7 +3874,7 @@ bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
    return true;
  }
  
-bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, 
+bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT,
                                                  unsigned Index) const {
    if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
      return false;
@@ -3882,6 +3882,16 @@ bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT,
    return (Index == 0 || Index == ResVT.getVectorNumElements());
  }
  
+bool X86TargetLowering::isCheapToSpeculateCttz() const {
+  // Speculate cttz only if we can directly use TZCNT.
+  return Subtarget->hasBMI();
+}
+
+bool X86TargetLowering::isCheapToSpeculateCtlz() const {
+  // Speculate ctlz only if we can directly use LZCNT.
+  return Subtarget->hasLZCNT();
+}
+
  /// isUndefOrInRange - Return true if Val is undef or if its value falls within
  /// the specified range (L, H].
  static bool isUndefOrInRange(int Val, int Low, int Hi) {
@@ -3896,7 +3906,7 @@ static bool isUndefOrEqual(int Val, int CmpVal) {
  
  /// isSequentialOrUndefInRange - Return true if every element in Mask, beginning
  /// from position Pos and ending in Pos+Size, falls within the specified
-/// sequential range (L, L+Pos]. or is undef.
+/// sequential range (Low, Low+Size]. or is undef.
  static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
                                         unsigned Pos, unsigned Size, int Low) {
    for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
@@ -6054,7 +6064,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
  
      return NewLd;
    }
-  
+
    //TODO: The code below fires only for for loading the low v2i32 / v2f32
    //of a v4i32 / v4f32. It's probably worth generalizing.
    if (NumElems == 4 && LastLoadedElt == 1 && (EltVT.getSizeInBits() == 32) &&
@@ -7041,7 +7051,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
      // Check for a build vector of consecutive loads.
      if (SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false))
        return LD;
-    
+
      EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
  
      // Build both the lower and upper subvector.
@@ -7711,17 +7721,6 @@ static SDValue lowerVectorShuffleAsByteShift(SDLoc DL, MVT VT, SDValue V1,
    int Size = Mask.size();
    int Scale = 16 / Size;
  
-  auto isSequential = [](int Base, int StartIndex, int EndIndex, int MaskOffset,
-                         ArrayRef<int> Mask) {
-    for (int i = StartIndex; i < EndIndex; i++) {
-      if (Mask[i] < 0)
-        continue;
-      if (i + Base != Mask[i] - MaskOffset)
-        return false;
-    }
-    return true;
-  };
-
    for (int Shift = 1; Shift < Size; Shift++) {
      int ByteShift = Shift * Scale;
  
@@ -7735,8 +7734,10 @@ static SDValue lowerVectorShuffleAsByteShift(SDLoc DL, MVT VT, SDValue V1,
      }
  
      if (ZeroableRight) {
-      bool ValidShiftRight1 = isSequential(Shift, 0, Size - Shift, 0, Mask);
-      bool ValidShiftRight2 = isSequential(Shift, 0, Size - Shift, Size, Mask);
+      bool ValidShiftRight1 =
+          isSequentialOrUndefInRange(Mask, 0, Size - Shift, Shift);
+      bool ValidShiftRight2 =
+          isSequentialOrUndefInRange(Mask, 0, Size - Shift, Size + Shift);
  
        if (ValidShiftRight1 || ValidShiftRight2) {
          // Cast the inputs to v2i64 to match PSRLDQ.
@@ -7758,8 +7759,10 @@ static SDValue lowerVectorShuffleAsByteShift(SDLoc DL, MVT VT, SDValue V1,
      }
  
      if (ZeroableLeft) {
-      bool ValidShiftLeft1 = isSequential(-Shift, Shift, Size, 0, Mask);
-      bool ValidShiftLeft2 = isSequential(-Shift, Shift, Size, Size, Mask);
+      bool ValidShiftLeft1 =
+          isSequentialOrUndefInRange(Mask, Shift, Size - Shift, 0);
+      bool ValidShiftLeft2 =
+          isSequentialOrUndefInRange(Mask, Shift, Size - Shift, Size);
  
        if (ValidShiftLeft1 || ValidShiftLeft2) {
          // Cast the inputs to v2i64 to match PSLLDQ.
@@ -16833,7 +16836,7 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
  /// The mask is comming as MVT::i8 and it should be truncated
  /// to MVT::i1 while lowering masking intrinsics.
  /// The main difference between ScalarMaskingNode and VectorMaskingNode is using
-/// "X86select" instead of "vselect". We just can't create the "vselect" node for 
+/// "X86select" instead of "vselect". We just can't create the "vselect" node for
  /// a scalar instruction.
  static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
                                      SDValue PreservedSrc,
@@ -22767,7 +22770,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    SDValue Vals[4];
    SDLoc dl(InputVector);
-  
+
    if (TLI.isOperationLegal(ISD::SRA, MVT::i64)) {
      SDValue Cst = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, InputVector);
      EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy();
@@ -22776,7 +22779,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
      SDValue TopHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst,
        DAG.getConstant(1, VecIdxTy));
  
-    SDValue ShAmt = DAG.getConstant(32, 
+    SDValue ShAmt = DAG.getConstant(32,
        DAG.getTargetLoweringInfo().getShiftAmountTy(MVT::i64));
      Vals[0] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BottomHalf);
      Vals[1] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,