Don't use a potentially expensive shift if all we want is one set bit.

[oota-llvm.git] / lib / Target / R600 / AMDGPUISelLowering.cpp
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp

index a266df535d56b46aa80733232b3c93f3fb82c55e..7fad3bbc6c8c3dc312472507b50332710fbd21fa 100644 (file)
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -14,14 +14,17 @@
  //===----------------------------------------------------------------------===//
  
  #include "AMDGPUISelLowering.h"
+#include "AMDGPU.h"
  #include "AMDGPURegisterInfo.h"
-#include "AMDILIntrinsicInfo.h"
  #include "AMDGPUSubtarget.h"
+#include "AMDILIntrinsicInfo.h"
+#include "SIMachineFunctionInfo.h"
  #include "llvm/CodeGen/CallingConvLower.h"
  #include "llvm/CodeGen/MachineFunction.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/CodeGen/SelectionDAG.h"
  #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/DataLayout.h"
  
  using namespace llvm;
  
@@ -46,6 +49,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
    setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
    setOperationAction(ISD::FRINT,  MVT::f32, Legal);
  
+  // The hardware supports ROTR, but not ROTL
+  setOperationAction(ISD::ROTL, MVT::i32, Expand);
+
    // Lower floating point store/load to integer store/load to reduce the number
    // of patterns in tablegen.
    setOperationAction(ISD::STORE, MVT::f32, Promote);
@@ -65,6 +71,28 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
    setOperationAction(ISD::UDIV, MVT::i32, Expand);
    setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
    setOperationAction(ISD::UREM, MVT::i32, Expand);
+
+  int types[] = {
+    (int)MVT::v2i32,
+    (int)MVT::v4i32
+  };
+  size_t NumTypes = sizeof(types) / sizeof(*types);
+
+  for (unsigned int x  = 0; x < NumTypes; ++x) {
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
+    //Expand the following operations for the current type by default
+    setOperationAction(ISD::ADD,  VT, Expand);
+    setOperationAction(ISD::AND,  VT, Expand);
+    setOperationAction(ISD::MUL,  VT, Expand);
+    setOperationAction(ISD::OR,   VT, Expand);
+    setOperationAction(ISD::SHL,  VT, Expand);
+    setOperationAction(ISD::SRL,  VT, Expand);
+    setOperationAction(ISD::SRA,  VT, Expand);
+    setOperationAction(ISD::SUB,  VT, Expand);
+    setOperationAction(ISD::UDIV, VT, Expand);
+    setOperationAction(ISD::UREM, VT, Expand);
+    setOperationAction(ISD::XOR,  VT, Expand);
+  }
  }
  
  //===---------------------------------------------------------------------===//
@@ -83,7 +111,7 @@ SDValue AMDGPUTargetLowering::LowerReturn(
                                       bool isVarArg,
                                       const SmallVectorImpl<ISD::OutputArg> &Outs,
                                       const SmallVectorImpl<SDValue> &OutVals,
-                                     DebugLoc DL, SelectionDAG &DAG) const {
+                                     SDLoc DL, SelectionDAG &DAG) const {
    return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
  }
  
@@ -111,10 +139,30 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
    return Op;
  }
  
+SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
+                                                 SDValue Op,
+                                                 SelectionDAG &DAG) const {
+
+  const DataLayout *TD = getTargetMachine().getDataLayout();
+  GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
+  // XXX: What does the value of G->getOffset() mean?
+  assert(G->getOffset() == 0 &&
+         "Do not know what to do with an non-zero offset");
+
+  unsigned Offset = MFI->LDSSize;
+  const GlobalValue *GV = G->getGlobal();
+  uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
+
+  // XXX: Account for alignment?
+  MFI->LDSSize += Size;
+
+  return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32);
+}
+
  SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
      SelectionDAG &DAG) const {
    unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    EVT VT = Op.getValueType();
  
    switch (IntrinsicID) {
@@ -154,7 +202,7 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
  SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
      SelectionDAG &DAG) const {
  
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    EVT VT = Op.getValueType();
    SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
                                                Op.getOperand(1));
@@ -166,7 +214,7 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
  /// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
  SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
      SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    EVT VT = Op.getValueType();
    SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
                                  DAG.getConstantFP(1.0f, MVT::f32),
@@ -181,7 +229,7 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
  /// \brief Generate Min/Max node
  SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
      SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    EVT VT = Op.getValueType();
  
    SDValue LHS = Op.getOperand(0);
@@ -242,7 +290,7 @@ SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
  
  SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
      SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    EVT VT = Op.getValueType();
  
    SDValue Num = Op.getOperand(0);