Add accessors for the PPC 403 bank registers.

[oota-llvm.git] / lib / Target / R600 / SIISelLowering.cpp
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp

index 577d1af3c23986ccfe85519c06155a1083a8066d..2e982e2c0a3ae6679b9f7aea5fd2db8af9f8cd82 100644 (file)
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -240,15 +240,13 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
  // TargetLowering queries
  //===----------------------------------------------------------------------===//
  
-bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT  VT,
-                                                     unsigned AddrSpace,
-                                                     bool *IsFast) const {
+bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT  VT,
+                                                      unsigned AddrSpace,
+                                                      unsigned Align,
+                                                      bool *IsFast) const {
    if (IsFast)
      *IsFast = false;
  
-  // XXX: This depends on the address space and also we may want to revist
-  // the alignment values we specify in the DataLayout.
-
    // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
    // which isn't a simple VT.
    if (!VT.isSimple() || VT == MVT::Other)
@@ -261,8 +259,12 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT  VT,
    // XXX - The only mention I see of this in the ISA manual is for LDS direct
    // reads the "byte address and must be dword aligned". Is it also true for the
    // normal loads and stores?
-  if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS)
-    return false;
+  if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS) {
+    // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
+    // aligned, 8 byte access in a single operation using ds_read2/write2_b32
+    // with adjacent offsets.
+    return Align % 4 == 0;
+  }
  
    // 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
    // byte-address are ignored, thus forcing Dword alignment.
@@ -272,6 +274,26 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT  VT,
    return VT.bitsGT(MVT::i32);
  }
  
+EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
+                                          unsigned SrcAlign, bool IsMemset,
+                                          bool ZeroMemset,
+                                          bool MemcpyStrSrc,
+                                          MachineFunction &MF) const {
+  // FIXME: Should account for address space here.
+
+  // The default fallback uses the private pointer size as a guess for a type to
+  // use. Make sure we switch these to 64-bit accesses.
+
+  if (Size >= 16 && DstAlign >= 4) // XXX: Should only do for global
+    return MVT::v4i32;
+
+  if (Size >= 8 && DstAlign >= 4)
+    return MVT::v2i32;
+
+  // Use the default.
+  return MVT::Other;
+}
+
  TargetLoweringBase::LegalizeTypeAction
  SITargetLowering::getPreferredVectorAction(EVT VT) const {
    if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
@@ -282,25 +304,33 @@ SITargetLowering::getPreferredVectorAction(EVT VT) const {
  
  bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                                           Type *Ty) const {
-  const SIInstrInfo *TII =
-    static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+      getTargetMachine().getSubtargetImpl()->getInstrInfo());
    return TII->isInlineConstant(Imm);
  }
  
  SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
-                                         SDLoc DL, SDValue Chain,
+                                         SDLoc SL, SDValue Chain,
                                           unsigned Offset, bool Signed) const {
+  const DataLayout *DL = getDataLayout();
+
+  Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+
    MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
-  PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
-                                            AMDGPUAS::CONSTANT_ADDRESS);
-  SDValue BasePtr =  DAG.getCopyFromReg(Chain, DL,
+  PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
+  SDValue BasePtr =  DAG.getCopyFromReg(Chain, SL,
                             MRI.getLiveInVirtReg(AMDGPU::SGPR0_SGPR1), MVT::i64);
-  SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
+  SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i64, BasePtr,
                                               DAG.getConstant(Offset, MVT::i64));
-  return DAG.getExtLoad(Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL, VT, Chain, Ptr,
-                            MachinePointerInfo(UndefValue::get(PtrTy)), MemVT,
-                            false, false, MemVT.getSizeInBits() >> 3);
-
+  SDValue PtrOffset = DAG.getUNDEF(getPointerTy(AMDGPUAS::CONSTANT_ADDRESS));
+  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+
+  return DAG.getLoad(ISD::UNINDEXED, Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD,
+                     VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT,
+                     false, // isVolatile
+                     true, // isNonTemporal
+                     true, // isInvariant
+                     DL->getABITypeAlignment(Ty)); // Alignment
  }
  
  SDValue SITargetLowering::LowerFormalArguments(
@@ -311,7 +341,8 @@ SDValue SITargetLowering::LowerFormalArguments(
                                        SDLoc DL, SelectionDAG &DAG,
                                        SmallVectorImpl<SDValue> &InVals) const {
  
-  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+  const TargetRegisterInfo *TRI =
+      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
  
    MachineFunction &MF = DAG.getMachineFunction();
    FunctionType *FType = MF.getFunction()->getFunctionType();
@@ -466,8 +497,8 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
      MachineInstr * MI, MachineBasicBlock * BB) const {
  
    MachineBasicBlock::iterator I = *MI;
-  const SIInstrInfo *TII =
-    static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+      getTargetMachine().getSubtargetImpl()->getInstrInfo());
    MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
  
    switch (MI->getOpcode()) {
@@ -536,8 +567,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
        .addReg(MI->getOperand(1).getReg())
        .addImm(1)  // SRC1 modifiers
        .addReg(MI->getOperand(2).getReg())
-      .addImm(0)  // SRC2 modifiers
-      .addImm(0)  // src2
        .addImm(0)  // CLAMP
        .addImm(0); // OMOD
      MI->eraseFromParent();
@@ -557,45 +586,48 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
    }
    case AMDGPU::FABS_SI: {
      MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
-    const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+    const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+        getTargetMachine().getSubtargetImpl()->getInstrInfo());
+    DebugLoc DL = MI->getDebugLoc();
+    unsigned DestReg = MI->getOperand(0).getReg();
      unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
-    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32),
-            Reg)
-            .addImm(0x7fffffff);
-    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_AND_B32_e32),
-            MI->getOperand(0).getReg())
-            .addReg(MI->getOperand(1).getReg())
-            .addReg(Reg);
+
+    BuildMI(*BB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), Reg)
+      .addImm(0x7fffffff);
+    BuildMI(*BB, I, DL, TII->get(AMDGPU::V_AND_B32_e32), DestReg)
+      .addReg(MI->getOperand(1).getReg())
+      .addReg(Reg);
      MI->eraseFromParent();
      break;
    }
    case AMDGPU::FNEG_SI: {
      MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
-    const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+    const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+        getTargetMachine().getSubtargetImpl()->getInstrInfo());
+    DebugLoc DL = MI->getDebugLoc();
+    unsigned DestReg = MI->getOperand(0).getReg();
      unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
-    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32),
-            Reg)
-            .addImm(0x80000000);
-    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_XOR_B32_e32),
-            MI->getOperand(0).getReg())
-            .addReg(MI->getOperand(1).getReg())
-            .addReg(Reg);
+
+    BuildMI(*BB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), Reg)
+      .addImm(0x80000000);
+    BuildMI(*BB, I, DL, TII->get(AMDGPU::V_XOR_B32_e32), DestReg)
+      .addReg(MI->getOperand(1).getReg())
+      .addReg(Reg);
      MI->eraseFromParent();
      break;
    }
    case AMDGPU::FCLAMP_SI: {
-    const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
-    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F32_e64),
-            MI->getOperand(0).getReg())
-            .addImm(0) // SRC0 modifiers
-            .addOperand(MI->getOperand(1))
-            .addImm(0) // SRC1 modifiers
-            .addImm(0) // SRC1
-            .addImm(1) // CLAMP
-            .addImm(0); // OMOD
+    const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+        getTargetMachine().getSubtargetImpl()->getInstrInfo());
+    DebugLoc DL = MI->getDebugLoc();
+    unsigned DestReg = MI->getOperand(0).getReg();
+    BuildMI(*BB, I, DL, TII->get(AMDGPU::V_ADD_F32_e64), DestReg)
+      .addImm(0) // SRC0 modifiers
+      .addOperand(MI->getOperand(1))
+      .addImm(0) // SRC1 modifiers
+      .addImm(0) // SRC1
+      .addImm(1) // CLAMP
+      .addImm(0); // OMOD
      MI->eraseFromParent();
    }
    }
@@ -684,8 +716,8 @@ static SDNode *findUser(SDValue Value, unsigned Opcode) {
  SDValue SITargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
  
    MachineFunction &MF = DAG.getMachineFunction();
-  const SIInstrInfo *TII =
-    static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+      getTargetMachine().getSubtargetImpl()->getInstrInfo());
    const SIRegisterInfo &TRI = TII->getRegisterInfo();
    FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Op);
    unsigned FrameIndex = FINode->getIndex();
@@ -746,7 +778,9 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
        BR->getOperand(0),
        BRCOND.getOperand(2)
      };
-    DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops);
+    SDValue NewBR = DAG.getNode(ISD::BR, DL, BR->getVTList(), Ops);
+    DAG.ReplaceAllUsesWith(BR, NewBR.getNode());
+    BR = NewBR.getNode();
    }
  
    SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
@@ -1324,8 +1358,8 @@ bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate,
                                 bool &ScalarSlotUsed) const {
  
    MachineSDNode *Mov = dyn_cast<MachineSDNode>(Operand);
-  const SIInstrInfo *TII =
-    static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+      getTargetMachine().getSubtargetImpl()->getInstrInfo());
    if (!Mov || !TII->isMov(Mov->getMachineOpcode()))
      return false;
  
@@ -1359,8 +1393,8 @@ bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate,
  
  const TargetRegisterClass *SITargetLowering::getRegClassForNode(
                                     SelectionDAG &DAG, const SDValue &Op) const {
-  const SIInstrInfo *TII =
-    static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+      getTargetMachine().getSubtargetImpl()->getInstrInfo());
    const SIRegisterInfo &TRI = TII->getRegisterInfo();
  
    if (!Op->isMachineOpcode()) {
@@ -1412,7 +1446,8 @@ const TargetRegisterClass *SITargetLowering::getRegClassForNode(
  /// \brief Does "Op" fit into register class "RegClass" ?
  bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
                                      unsigned RegClass) const {
-  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+  const TargetRegisterInfo *TRI =
+      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
    const TargetRegisterClass *RC = getRegClassForNode(DAG, Op);
    if (!RC) {
      return false;
@@ -1478,8 +1513,8 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
  
    // Original encoding (either e32 or e64)
    int Opcode = Node->getMachineOpcode();
-  const SIInstrInfo *TII =
-    static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+      getTargetMachine().getSubtargetImpl()->getInstrInfo());
    const MCInstrDesc *Desc = &TII->get(Opcode);
  
    unsigned NumDefs = Desc->getNumDefs();
@@ -1606,39 +1641,23 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
        continue;
      if (!Operand.isMachineOpcode())
        continue;
-    if (Operand.getMachineOpcode() == AMDGPU::FNEG_SI) {
-      Ops.pop_back();
-      Ops.push_back(Operand.getOperand(0));
-      InputModifiers[i] = 1;
-      Promote2e64 = true;
-      if (!DescE64)
-        continue;
-      Desc = DescE64;
-      DescE64 = nullptr;
-    }
-    else if (Operand.getMachineOpcode() == AMDGPU::FABS_SI) {
-      Ops.pop_back();
-      Ops.push_back(Operand.getOperand(0));
-      InputModifiers[i] = 2;
-      Promote2e64 = true;
-      if (!DescE64)
-        continue;
-      Desc = DescE64;
-      DescE64 = nullptr;
-    }
    }
  
    if (Promote2e64) {
      std::vector<SDValue> OldOps(Ops);
      Ops.clear();
+    bool HasModifiers = TII->hasModifiers(Desc->Opcode);
      for (unsigned i = 0; i < OldOps.size(); ++i) {
        // src_modifier
-      Ops.push_back(DAG.getTargetConstant(InputModifiers[i], MVT::i32));
+      if (HasModifiers)
+        Ops.push_back(DAG.getTargetConstant(InputModifiers[i], MVT::i32));
        Ops.push_back(OldOps[i]);
      }
      // Add the modifier flags while promoting
-    for (unsigned i = 0; i < 2; ++i)
-      Ops.push_back(DAG.getTargetConstant(0, MVT::i32));
+    if (HasModifiers) {
+      for (unsigned i = 0; i < 2; ++i)
+        Ops.push_back(DAG.getTargetConstant(0, MVT::i32));
+    }
    }
  
    // Add optional chain and glue
@@ -1750,8 +1769,8 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
  /// \brief Fold the instructions after selecting them.
  SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
                                            SelectionDAG &DAG) const {
-  const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+      getTargetMachine().getSubtargetImpl()->getInstrInfo());
    Node = AdjustRegClass(Node, DAG);
  
    if (TII->isMIMG(Node->getMachineOpcode()))
@@ -1764,8 +1783,8 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
  /// bits set in the writemask
  void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
                                                       SDNode *Node) const {
-  const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+      getTargetMachine().getSubtargetImpl()->getInstrInfo());
    if (!TII->isMIMG(MI->getOpcode()))
      return;