Don't use a potentially expensive shift if all we want is one set bit.

[oota-llvm.git] / lib / Target / R600 / AMDGPUISelLowering.cpp
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp

index 0a33264686d20b2342c0155ee839f58834e3e45e..7fad3bbc6c8c3dc312472507b50332710fbd21fa 100644 (file)
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -14,14 +14,22 @@
  //===----------------------------------------------------------------------===//
  
  #include "AMDGPUISelLowering.h"
+#include "AMDGPU.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUSubtarget.h"
  #include "AMDILIntrinsicInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
  #include "llvm/CodeGen/MachineFunction.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/CodeGen/SelectionDAG.h"
  #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/DataLayout.h"
  
  using namespace llvm;
  
+#include "AMDGPUGenCallingConv.inc"
+
  AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
    TargetLowering(TM, new TargetLoweringObjectFileELF()) {
  
@@ -41,6 +49,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
    setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
    setOperationAction(ISD::FRINT,  MVT::f32, Legal);
  
+  // The hardware supports ROTR, but not ROTL
+  setOperationAction(ISD::ROTL, MVT::i32, Expand);
+
    // Lower floating point store/load to integer store/load to reduce the number
    // of patterns in tablegen.
    setOperationAction(ISD::STORE, MVT::f32, Promote);
@@ -55,26 +66,43 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
    setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
    AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
  
+  setOperationAction(ISD::MUL, MVT::i64, Expand);
+
    setOperationAction(ISD::UDIV, MVT::i32, Expand);
    setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
    setOperationAction(ISD::UREM, MVT::i32, Expand);
+
+  int types[] = {
+    (int)MVT::v2i32,
+    (int)MVT::v4i32
+  };
+  size_t NumTypes = sizeof(types) / sizeof(*types);
+
+  for (unsigned int x  = 0; x < NumTypes; ++x) {
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
+    //Expand the following operations for the current type by default
+    setOperationAction(ISD::ADD,  VT, Expand);
+    setOperationAction(ISD::AND,  VT, Expand);
+    setOperationAction(ISD::MUL,  VT, Expand);
+    setOperationAction(ISD::OR,   VT, Expand);
+    setOperationAction(ISD::SHL,  VT, Expand);
+    setOperationAction(ISD::SRL,  VT, Expand);
+    setOperationAction(ISD::SRA,  VT, Expand);
+    setOperationAction(ISD::SUB,  VT, Expand);
+    setOperationAction(ISD::UDIV, VT, Expand);
+    setOperationAction(ISD::UREM, VT, Expand);
+    setOperationAction(ISD::XOR,  VT, Expand);
+  }
  }
  
  //===---------------------------------------------------------------------===//
  // TargetLowering Callbacks
  //===---------------------------------------------------------------------===//
  
-SDValue AMDGPUTargetLowering::LowerFormalArguments(
-                                      SDValue Chain,
-                                      CallingConv::ID CallConv,
-                                      bool isVarArg,
-                                      const SmallVectorImpl<ISD::InputArg> &Ins,
-                                      DebugLoc DL, SelectionDAG &DAG,
-                                      SmallVectorImpl<SDValue> &InVals) const {
-  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
-    InVals.push_back(SDValue());
-  }
-  return Chain;
+void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
+                             const SmallVectorImpl<ISD::InputArg> &Ins) const {
+
+  State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
  }
  
  SDValue AMDGPUTargetLowering::LowerReturn(
@@ -83,7 +111,7 @@ SDValue AMDGPUTargetLowering::LowerReturn(
                                       bool isVarArg,
                                       const SmallVectorImpl<ISD::OutputArg> &Outs,
                                       const SmallVectorImpl<SDValue> &OutVals,
-                                     DebugLoc DL, SelectionDAG &DAG) const {
+                                     SDLoc DL, SelectionDAG &DAG) const {
    return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
  }
  
@@ -111,10 +139,30 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
    return Op;
  }
  
+SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
+                                                 SDValue Op,
+                                                 SelectionDAG &DAG) const {
+
+  const DataLayout *TD = getTargetMachine().getDataLayout();
+  GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
+  // XXX: What does the value of G->getOffset() mean?
+  assert(G->getOffset() == 0 &&
+         "Do not know what to do with an non-zero offset");
+
+  unsigned Offset = MFI->LDSSize;
+  const GlobalValue *GV = G->getGlobal();
+  uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
+
+  // XXX: Account for alignment?
+  MFI->LDSSize += Size;
+
+  return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32);
+}
+
  SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
      SelectionDAG &DAG) const {
    unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    EVT VT = Op.getValueType();
  
    switch (IntrinsicID) {
@@ -154,7 +202,7 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
  SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
      SelectionDAG &DAG) const {
  
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    EVT VT = Op.getValueType();
    SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
                                                Op.getOperand(1));
@@ -166,7 +214,7 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
  /// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
  SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
      SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    EVT VT = Op.getValueType();
    SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
                                  DAG.getConstantFP(1.0f, MVT::f32),
@@ -181,7 +229,7 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
  /// \brief Generate Min/Max node
  SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
      SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    EVT VT = Op.getValueType();
  
    SDValue LHS = Op.getOperand(0);
@@ -242,7 +290,7 @@ SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
  
  SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
      SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    EVT VT = Op.getValueType();
  
    SDValue Num = Op.getOperand(0);