[x86] Revert my over-eager commit in r217332.

[oota-llvm.git] / lib / Target / R600 / R600ISelLowering.cpp
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp

index f58f81e19d234e0cca43a4fde4920625e20a74d4..3bc8cb99c7196a28813b364adb5cbd5ab95341ea 100644 (file)
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -19,6 +19,7 @@
  #include "R600Defines.h"
  #include "R600InstrInfo.h"
  #include "R600MachineFunctionInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
  #include "llvm/CodeGen/CallingConvLower.h"
  #include "llvm/CodeGen/MachineFrameInfo.h"
  #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -82,6 +83,8 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
    setOperationAction(ISD::SETCC, MVT::i32, Expand);
    setOperationAction(ISD::SETCC, MVT::f32, Expand);
    setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
  
    setOperationAction(ISD::SELECT, MVT::i32, Expand);
    setOperationAction(ISD::SELECT, MVT::f32, Expand);
@@ -189,7 +192,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
    MachineRegisterInfo &MRI = MF->getRegInfo();
    MachineBasicBlock::iterator I = *MI;
    const R600InstrInfo *TII =
-    static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
+      static_cast<const R600InstrInfo *>(MF->getSubtarget().getInstrInfo());
  
    switch (MI->getOpcode()) {
    default:
@@ -578,7 +581,14 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
    case ISD::FSIN: return LowerTrig(Op, DAG);
    case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
    case ISD::STORE: return LowerSTORE(Op, DAG);
-  case ISD::LOAD: return LowerLOAD(Op, DAG);
+  case ISD::LOAD: {
+    SDValue Result = LowerLOAD(Op, DAG);
+    assert((!Result.getNode() ||
+            Result.getNode()->getNumValues() == 2) &&
+           "Load should return a value and a chain");
+    return Result;
+  }
+
    case ISD::BRCOND: return LowerBRCOND(Op, DAG);
    case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
    case ISD::INTRINSIC_VOID: {
@@ -635,8 +645,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
        MachineSDNode *interp;
        if (ijb < 0) {
          const MachineFunction &MF = DAG.getMachineFunction();
-        const R600InstrInfo *TII =
-          static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
+        const R600InstrInfo *TII = static_cast<const R600InstrInfo *>(
+            MF.getSubtarget().getInstrInfo());
          interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
              MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
          return DAG.getTargetExtractSubreg(
@@ -832,8 +842,20 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
    default:
      AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
      return;
-  case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
+  case ISD::FP_TO_UINT:
+    if (N->getValueType(0) == MVT::i1) {
+      Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
+      return;
+    }
+    // Fall-through. Since we don't care about out of bounds values
+    // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
+    // considers some extra cases which are not necessary here.
+  case ISD::FP_TO_SINT: {
+    SDValue Result;
+    if (expandFP_TO_SINT(N, Result, DAG))
+      Results.push_back(Result);
      return;
+  }
    case ISD::UDIV: {
      SDValue Op = SDValue(N, 0);
      SDLoc DL(Op);
@@ -1408,8 +1430,8 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
    // Lowering for indirect addressing
  
    const MachineFunction &MF = DAG.getMachineFunction();
-  const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
-                                         getTargetMachine().getFrameLowering());
+  const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
+      getTargetMachine().getSubtargetImpl()->getFrameLowering());
    unsigned StackWidth = TFL->getStackWidth(MF);
  
    Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
@@ -1505,10 +1527,23 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
      return DAG.getMergeValues(Ops, DL);
    }
  
+  // Lower loads constant address space global variable loads
+  if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
+      isa<GlobalVariable>(
+          GetUnderlyingObject(LoadNode->getMemOperand()->getValue()))) {
+
+    SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
+        getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
+    Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
+        DAG.getConstant(2, MVT::i32));
+    return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
+                       LoadNode->getChain(), Ptr,
+                       DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2));
+  }
  
    if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
      SDValue MergedValues[2] = {
-      SplitVectorLoad(Op, DAG),
+      ScalarizeVectorLoad(Op, DAG),
        Chain
      };
      return DAG.getMergeValues(MergedValues, DL);
@@ -1578,6 +1613,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
                                    LoadNode->getPointerInfo(), MemVT,
                                    LoadNode->isVolatile(),
                                    LoadNode->isNonTemporal(),
+                                  LoadNode->isInvariant(),
                                    LoadNode->getAlignment());
      SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
      SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
@@ -1592,8 +1628,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
  
    // Lowering for indirect addressing
    const MachineFunction &MF = DAG.getMachineFunction();
-  const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
-                                         getTargetMachine().getFrameLowering());
+  const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
+      getTargetMachine().getSubtargetImpl()->getFrameLowering());
    unsigned StackWidth = TFL->getStackWidth(MF);
  
    Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
@@ -1656,10 +1692,10 @@ SDValue R600TargetLowering::LowerFormalArguments(
                                        SDLoc DL, SelectionDAG &DAG,
                                        SmallVectorImpl<SDValue> &InVals) const {
    SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                 getTargetMachine(), ArgLocs, *DAG.getContext());
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+                 *DAG.getContext());
    MachineFunction &MF = DAG.getMachineFunction();
-  unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
+  unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->getShaderType();
  
    SmallVector<ISD::InputArg, 8> LocalIns;
  
@@ -1669,8 +1705,13 @@ SDValue R600TargetLowering::LowerFormalArguments(
  
    for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
      CCValAssign &VA = ArgLocs[i];
-    EVT VT = Ins[i].VT;
-    EVT MemVT = LocalIns[i].VT;
+    const ISD::InputArg &In = Ins[i];
+    EVT VT = In.VT;
+    EVT MemVT = VA.getLocVT();
+    if (!VT.isVector() && MemVT.isVector()) {
+      // Get load source type if scalarized.
+      MemVT = MemVT.getVectorElementType();
+    }
  
      if (ShaderType != ShaderType::COMPUTE) {
        unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
@@ -1680,7 +1721,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
      }
  
      PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
-                                                   AMDGPUAS::CONSTANT_BUFFER_0);
+                                          AMDGPUAS::CONSTANT_BUFFER_0);
  
      // i64 isn't a legal type, so the register type used ends up as i32, which
      // isn't expected here. It attempts to create this sextload, but it ends up
@@ -1689,15 +1730,28 @@ SDValue R600TargetLowering::LowerFormalArguments(
  
      // The first 36 bytes of the input buffer contains information about
      // thread group and global sizes.
+    ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
+    if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
+      // FIXME: This should really check the extload type, but the handling of
+      // extload vector parameters seems to be broken.
+
+      // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+      Ext = ISD::SEXTLOAD;
+    }
+
+    // Compute the offset from the value.
+    // XXX - I think PartOffset should give you this, but it seems to give the
+    // size of the register which isn't useful.
+
+    unsigned ValBase = ArgLocs[In.OrigArgIndex].getLocMemOffset();
+    unsigned PartOffset = VA.getLocMemOffset();
  
-    // FIXME: This should really check the extload type, but the handling of
-    // extload vecto parameters seems to be broken.
-    //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
-    ISD::LoadExtType Ext = ISD::SEXTLOAD;
-    SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
-                                 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
-                                 MachinePointerInfo(UndefValue::get(PtrTy)),
-                                 MemVT, false, false, 4);
+    MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
+    SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
+                              DAG.getConstant(36 + PartOffset, MVT::i32),
+                              DAG.getUNDEF(MVT::i32),
+                              PtrInfo,
+                              MemVT, false, true, true, 4);
  
      // 4 is the preferred alignment for the CONSTANT memory space.
      InVals.push_back(Arg);
@@ -2046,7 +2100,7 @@ static bool
  FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
              SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
    const R600InstrInfo *TII =
-      static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
+      static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
    if (!Src.isMachineOpcode())
      return false;
    switch (Src.getMachineOpcode()) {
@@ -2171,16 +2225,15 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
  SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
                                              SelectionDAG &DAG) const {
    const R600InstrInfo *TII =
-      static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
+      static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
    if (!Node->isMachineOpcode())
      return Node;
    unsigned Opcode = Node->getMachineOpcode();
    SDValue FakeOp;
  
    std::vector<SDValue> Ops;
-  for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
-              I != E; ++I)
-          Ops.push_back(*I);
+  for (const SDUse &I : Node->ops())
+    Ops.push_back(I);
  
    if (Opcode == AMDGPU::DOT_4) {
      int OperandIdx[] = {