[PowerPC] Try harder to find a base+offset when looking for consecutive accesses

[oota-llvm.git] / lib / Target / PowerPC / PPCISelLowering.cpp
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index c56f72525e7007c179f1a1daf4af9d03e0c4dec9..804aec92164e6a89224f0ac044e05cdfd1d735b7 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -431,6 +431,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
        AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
        setOperationAction(ISD::SELECT, VT, Promote);
        AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
+      setOperationAction(ISD::SELECT_CC, VT, Promote);
+      AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
        setOperationAction(ISD::STORE, VT, Promote);
        AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
  
@@ -542,6 +544,15 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
  
      if (Subtarget.hasVSX()) {
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
+      if (Subtarget.hasP8Vector())
+        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
+      if (Subtarget.hasDirectMove()) {
+        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
+        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
+        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
+        // FIXME: this is causing bootstrap failures, disable temporarily
+        //setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
+      }
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
  
        setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
@@ -1430,6 +1441,11 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
    assert(N->getValueType(0) == MVT::v16i8 &&
           (EltSize == 1 || EltSize == 2 || EltSize == 4));
  
+  // The consecutive indices need to specify an element, not part of two
+  // different elements.  So abandon ship early if this isn't the case.
+  if (N->getMaskElt(0) % EltSize != 0)
+    return false;
+
    // This is a splat operation if each element of the permute is the same, and
    // if the value doesn't reference the second vector.
    unsigned ElementBase = N->getMaskElt(0);
@@ -1991,10 +2007,10 @@ static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit,
                  DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
  
    SDValue Ops[] = { GA, Reg };
-  return DAG.getMemIntrinsicNode(PPCISD::TOC_ENTRY, dl,
-                                 DAG.getVTList(VT, MVT::Other), Ops, VT,
-                                 MachinePointerInfo::getGOT(), 0, false, true,
-                                 false, 0);
+  return DAG.getMemIntrinsicNode(
+      PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
+      MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true,
+      false, 0);
  }
  
  SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
@@ -2085,6 +2101,9 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
    // large models could be added if users need it, at the cost of
    // additional complexity.
    GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+  if (DAG.getTarget().Options.EmulatedTLS)
+    return LowerToTLSEmulatedModel(GA, DAG);
+
    SDLoc dl(GA);
    const GlobalValue *GV = GA->getGlobal();
    EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -3888,9 +3907,10 @@ StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
      SDValue FIN = TailCallArgs[i].FrameIdxOp;
      int FI = TailCallArgs[i].FrameIdx;
      // Store relative to framepointer.
-    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
-                                       MachinePointerInfo::getFixedStack(FI),
-                                       false, false, 0));
+    MemOpChains.push_back(DAG.getStore(
+        Chain, dl, Arg, FIN,
+        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
+        false, 0));
    }
  }
  
@@ -3915,9 +3935,10 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
                                                            NewRetAddrLoc, true);
      EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
      SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
-    Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
-                         MachinePointerInfo::getFixedStack(NewRetAddr),
-                         false, false, 0);
+    Chain = DAG.getStore(
+        Chain, dl, OldRetAddr, NewRetAddrFrIdx,
+        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewRetAddr),
+        false, false, 0);
  
      // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
      // slot as the FP is never overwritten.
@@ -3926,9 +3947,10 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
        int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
                                                            true);
        SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
-      Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
-                           MachinePointerInfo::getFixedStack(NewFPIdx),
-                           false, false, 0);
+      Chain = DAG.getStore(
+          Chain, dl, OldFP, NewFramePtrIdx,
+          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewFPIdx),
+          false, false, 0);
      }
    }
    return Chain;
@@ -5311,9 +5333,10 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
      unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
      SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
      SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
-    Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
-                         MachinePointerInfo::getStack(TOCSaveOffset),
-                         false, false, 0);
+    Chain = DAG.getStore(
+        Val.getValue(1), dl, Val, AddPtr,
+        MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset),
+        false, false, 0);
      // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
      // This does not mean the MTCTR instruction must use R12; it's easier
      // to model this as an extra parameter, so do that.
@@ -6094,7 +6117,8 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
      (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
    SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
    int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
-  MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
+  MachinePointerInfo MPI =
+      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
  
    // Emit a store to the stack slot.
    SDValue Chain;
@@ -6414,17 +6438,18 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
        int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
        SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
  
-      SDValue Store =
-        DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
-                     MachinePointerInfo::getFixedStack(FrameIdx),
-                     false, false, 0);
+      SDValue Store = DAG.getStore(
+          DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
+          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
+          false, false, 0);
  
        assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
               "Expected an i32 store");
  
        RLI.Ptr = FIdx;
        RLI.Chain = Store;
-      RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
+      RLI.MPI =
+          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
        RLI.Alignment = 4;
  
        MachineMemOperand *MMO =
@@ -6465,16 +6490,18 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
        int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
        SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
  
-      SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
-                                   MachinePointerInfo::getFixedStack(FrameIdx),
-                                   false, false, 0);
+      SDValue Store = DAG.getStore(
+          DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
+          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
+          false, false, 0);
  
        assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
               "Expected an i32 store");
  
        RLI.Ptr = FIdx;
        RLI.Chain = Store;
-      RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
+      RLI.MPI =
+          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
        RLI.Alignment = 4;
      }
  
@@ -6499,14 +6526,16 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
                                  Op.getOperand(0));
  
      // STD the extended value into the stack slot.
-    SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
-                                 MachinePointerInfo::getFixedStack(FrameIdx),
-                                 false, false, 0);
+    SDValue Store = DAG.getStore(
+        DAG.getEntryNode(), dl, Ext64, FIdx,
+        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
+        false, false, 0);
  
      // Load the value as a double.
-    Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
-                     MachinePointerInfo::getFixedStack(FrameIdx),
-                     false, false, false, 0);
+    Ld = DAG.getLoad(
+        MVT::f64, dl, Store, FIdx,
+        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
+        false, false, false, 0);
    }
  
    // FCFID it and return it.
@@ -6761,7 +6790,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
      // to a zero vector to get the boolean result.
      MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
      int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
-    MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+    MachinePointerInfo PtrInfo =
+        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
      EVT PtrVT = getPointerTy(DAG.getDataLayout());
      SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
  
@@ -6807,9 +6837,9 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
        ValueVTs.push_back(MVT::Other); // chain
        SDVTList VTs = DAG.getVTList(ValueVTs);
  
-      return DAG.getMemIntrinsicNode(PPCISD::QVLFSb,
-        dl, VTs, Ops, MVT::v4f32,
-        MachinePointerInfo::getConstantPool());
+      return DAG.getMemIntrinsicNode(
+          PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
+          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
      }
  
      SmallVector<SDValue, 4> Stores;
@@ -7170,7 +7200,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
          PPC::isSplatShuffleMask(SVOp, 4) ||
          PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
          PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
-        PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
          PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
          PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
          PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
@@ -7178,8 +7207,10 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
          PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
          PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
          PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
-        PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG)   ||
-        PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)) {
+        (Subtarget.hasP8Altivec() && (
+         PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
+         PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
+         PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
        return Op;
      }
    }
@@ -7190,7 +7221,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
    unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
    if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
        PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
-      PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
        PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
        PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
        PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
@@ -7198,8 +7228,10 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
        PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
        PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
        PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
-      PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG)             ||
-      PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))
+      (Subtarget.hasP8Altivec() && (
+       PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
+       PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
+       PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
      return Op;
  
    // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
@@ -7536,7 +7568,8 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
  
    MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
    int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
-  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+  MachinePointerInfo PtrInfo =
+      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
    EVT PtrVT = getPointerTy(DAG.getDataLayout());
    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
  
@@ -7752,7 +7785,8 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
  
    MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
    int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
-  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+  MachinePointerInfo PtrInfo =
+      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
    EVT PtrVT = getPointerTy(DAG.getDataLayout());
    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
  
@@ -9128,7 +9162,7 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
    return SDValue();
  }
  
-bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
    // Note: This functionality is used only when unsafe-fp-math is enabled, and
    // on cores with reciprocal estimates (which are used when unsafe-fp-math is
    // enabled for division), this functionality is redundant with the default
@@ -9141,12 +9175,26 @@ bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
    // one FP pipeline) for three or more FDIVs (for generic OOO cores).
    switch (Subtarget.getDarwinDirective()) {
    default:
-    return NumUsers > 2;
+    return 3;
    case PPC::DIR_440:
    case PPC::DIR_A2:
    case PPC::DIR_E500mc:
    case PPC::DIR_E5500:
-    return NumUsers > 1;
+    return 2;
+  }
+}
+
+// isConsecutiveLSLoc needs to work even if all adds have not yet been
+// collapsed, and so we need to look through chains of them.
+static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
+                                     int64_t& Offset, SelectionDAG &DAG) {
+  if (DAG.isBaseWithConstantOffset(Loc)) {
+    Base = Loc.getOperand(0);
+    Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
+
+    // The base might itself be a base plus an offset, and if so, accumulate
+    // that as well.
+    getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
    }
  }
  
@@ -9169,16 +9217,18 @@ static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
      return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
    }
  
-  // Handle X+C
-  if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
-      cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
-    return true;
-
+  SDValue Base1 = Loc, Base2 = BaseLoc;
+  int64_t Offset1 = 0, Offset2 = 0;
+  getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
+  getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
+    if (Base1 == Base2 && Offset1 == (Offset2 + Dist*Bytes))
+      return true;
+  
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    const GlobalValue *GV1 = nullptr;
    const GlobalValue *GV2 = nullptr;
-  int64_t Offset1 = 0;
-  int64_t Offset2 = 0;
+  Offset1 = 0;
+  Offset2 = 0;
    bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
    bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
    if (isGA1 && isGA2 && GV1 == GV2)
@@ -9961,6 +10011,9 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
      if (Src.getValueType() == MVT::f32) {
        Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
        DCI.AddToWorklist(Src.getNode());
+    } else if (Src.getValueType() != MVT::f64) {
+      // Make sure that we don't pick up a ppc_fp128 source value.
+      return SDValue();
      }
  
      unsigned FCTOp =
@@ -10265,7 +10318,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
        // original unaligned load.
        MachineFunction &MF = DAG.getMachineFunction();
        MachineMemOperand *BaseMMO =
-        MF.getMachineMemOperand(LD->getMemOperand(), -MemVT.getStoreSize()+1,
+        MF.getMachineMemOperand(LD->getMemOperand(),
+                                -(long)MemVT.getStoreSize()+1,
                                  2*MemVT.getStoreSize()-1);
  
        // Create the new base load.
@@ -11469,7 +11523,7 @@ bool
  PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
                       EVT VT , unsigned DefinedValues) const {
    if (VT == MVT::v2i64)
-    return false;
+    return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
  
    if (Subtarget.hasQPX()) {
      if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1)