lib/Target/R600/AMDILISelDAGToDAG.cpp

   1 //===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// \brief Defines an instruction selector for the AMDGPU target.
  12 //
  13 //===----------------------------------------------------------------------===//
  14 #include "AMDGPUInstrInfo.h"
  15 #include "AMDGPUISelLowering.h" // For AMDGPUISD
  16 #include "AMDGPURegisterInfo.h"
  17 #include "AMDILDevices.h"
  18 #include "R600InstrInfo.h"
  19 #include "SIISelLowering.h"
  20 #include "llvm/ADT/ValueMap.h"
  21 #include "llvm/CodeGen/MachineRegisterInfo.h"
  22 #include "llvm/CodeGen/PseudoSourceValue.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/Support/Compiler.h"
  26 #include <list>
  27 #include <queue>
  28
  29 using namespace llvm;
  30
  31 //===----------------------------------------------------------------------===//
  32 // Instruction Selector Implementation
  33 //===----------------------------------------------------------------------===//
  34
  35 namespace {
  36 /// AMDGPU specific code to select AMDGPU machine instructions for
  37 /// SelectionDAG operations.
  38 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
  39   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
  40   // make the right decision when generating code for different targets.
  41   const AMDGPUSubtarget &Subtarget;
  42 public:
  43   AMDGPUDAGToDAGISel(TargetMachine &TM);
  44   virtual ~AMDGPUDAGToDAGISel();
  45
  46   SDNode *Select(SDNode *N);
  47   virtual const char *getPassName() const;
  48   virtual void PostprocessISelDAG();
  49
  50 private:
  51   inline SDValue getSmallIPtrImm(unsigned Imm);
  52   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
  53                    const R600InstrInfo *TII, std::vector<unsigned> Cst);
  54   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
  55   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
  56
  57   // Complex pattern selectors
  58   bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
  59   bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
  60   bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
  61
  62   static bool checkType(const Value *ptr, unsigned int addrspace);
  63   static const Value *getBasePointerValue(const Value *V);
  64
  65   static bool isGlobalStore(const StoreSDNode *N);
  66   static bool isPrivateStore(const StoreSDNode *N);
  67   static bool isLocalStore(const StoreSDNode *N);
  68   static bool isRegionStore(const StoreSDNode *N);
  69
  70   static bool isCPLoad(const LoadSDNode *N);
  71   static bool isConstantLoad(const LoadSDNode *N, int cbID);
  72   static bool isGlobalLoad(const LoadSDNode *N);
  73   static bool isParamLoad(const LoadSDNode *N);
  74   static bool isPrivateLoad(const LoadSDNode *N);
  75   static bool isLocalLoad(const LoadSDNode *N);
  76   static bool isRegionLoad(const LoadSDNode *N);
  77
  78   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
  79   bool SelectGlobalValueVariableOffset(SDValue Addr,
  80       SDValue &BaseReg, SDValue& Offset);
  81   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
  82   bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
  83
  84   // Include the pieces autogenerated from the target description.
  85 #include "AMDGPUGenDAGISel.inc"
  86 };
  87 }  // end anonymous namespace
  88
  89 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
  90 // DAG, ready for instruction scheduling.
  91 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM
  92                                        ) {
  93   return new AMDGPUDAGToDAGISel(TM);
  94 }
  95
  96 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM
  97                                      )
  98   : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) {
  99 }
 100
 101 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
 102 }
 103
 104 SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
 105   return CurDAG->getTargetConstant(Imm, MVT::i32);
 106 }
 107
 108 bool AMDGPUDAGToDAGISel::SelectADDRParam(
 109     SDValue Addr, SDValue& R1, SDValue& R2) {
 110
 111   if (Addr.getOpcode() == ISD::FrameIndex) {
 112     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
 113       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
 114       R2 = CurDAG->getTargetConstant(0, MVT::i32);
 115     } else {
 116       R1 = Addr;
 117       R2 = CurDAG->getTargetConstant(0, MVT::i32);
 118     }
 119   } else if (Addr.getOpcode() == ISD::ADD) {
 120     R1 = Addr.getOperand(0);
 121     R2 = Addr.getOperand(1);
 122   } else {
 123     R1 = Addr;
 124     R2 = CurDAG->getTargetConstant(0, MVT::i32);
 125   }
 126   return true;
 127 }
 128
 129 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
 130   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
 131       Addr.getOpcode() == ISD::TargetGlobalAddress) {
 132     return false;
 133   }
 134   return SelectADDRParam(Addr, R1, R2);
 135 }
 136
 137
 138 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
 139   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
 140       Addr.getOpcode() == ISD::TargetGlobalAddress) {
 141     return false;
 142   }
 143
 144   if (Addr.getOpcode() == ISD::FrameIndex) {
 145     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
 146       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
 147       R2 = CurDAG->getTargetConstant(0, MVT::i64);
 148     } else {
 149       R1 = Addr;
 150       R2 = CurDAG->getTargetConstant(0, MVT::i64);
 151     }
 152   } else if (Addr.getOpcode() == ISD::ADD) {
 153     R1 = Addr.getOperand(0);
 154     R2 = Addr.getOperand(1);
 155   } else {
 156     R1 = Addr;
 157     R2 = CurDAG->getTargetConstant(0, MVT::i64);
 158   }
 159   return true;
 160 }
 161
 162 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
 163   unsigned int Opc = N->getOpcode();
 164   if (N->isMachineOpcode()) {
 165     return NULL;   // Already selected.
 166   }
 167   switch (Opc) {
 168   default: break;
 169   case ISD::BUILD_VECTOR: {
 170     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
 171     if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
 172       break;
 173     }
 174     // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
 175     // that adds a 128 bits reg copy when going through TwoAddressInstructions
 176     // pass. We want to avoid 128 bits copies as much as possible because they
 177     // can't be bundled by our scheduler.
 178     SDValue RegSeqArgs[9] = {
 179       CurDAG->getTargetConstant(AMDGPU::R600_Reg128RegClassID, MVT::i32),
 180       SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
 181       SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
 182       SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
 183       SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
 184     };
 185     bool IsRegSeq = true;
 186     for (unsigned i = 0; i < N->getNumOperands(); i++) {
 187       if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
 188         IsRegSeq = false;
 189         break;
 190       }
 191       RegSeqArgs[2 * i + 1] = N->getOperand(i);
 192     }
 193     if (!IsRegSeq)
 194       break;
 195     return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
 196         RegSeqArgs, 2 * N->getNumOperands() + 1);
 197   }
 198   case ISD::BUILD_PAIR: {
 199     SDValue RC, SubReg0, SubReg1;
 200     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
 201     if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
 202       break;
 203     }
 204     if (N->getValueType(0) == MVT::i128) {
 205       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
 206       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
 207       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
 208     } else if (N->getValueType(0) == MVT::i64) {
 209       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32);
 210       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
 211       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
 212     } else {
 213       llvm_unreachable("Unhandled value type for BUILD_PAIR");
 214     }
 215     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
 216                             N->getOperand(1), SubReg1 };
 217     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
 218                                   SDLoc(N), N->getValueType(0), Ops);
 219   }
 220
 221   case ISD::ConstantFP:
 222   case ISD::Constant: {
 223     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
 224     // XXX: Custom immediate lowering not implemented yet.  Instead we use
 225     // pseudo instructions defined in SIInstructions.td
 226     if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
 227       break;
 228     }
 229     const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());
 230
 231     uint64_t ImmValue = 0;
 232     unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
 233
 234     if (N->getOpcode() == ISD::ConstantFP) {
 235       // XXX: 64-bit Immediates not supported yet
 236       assert(N->getValueType(0) != MVT::f64);
 237
 238       ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
 239       APFloat Value = C->getValueAPF();
 240       float FloatValue = Value.convertToFloat();
 241       if (FloatValue == 0.0) {
 242         ImmReg = AMDGPU::ZERO;
 243       } else if (FloatValue == 0.5) {
 244         ImmReg = AMDGPU::HALF;
 245       } else if (FloatValue == 1.0) {
 246         ImmReg = AMDGPU::ONE;
 247       } else {
 248         ImmValue = Value.bitcastToAPInt().getZExtValue();
 249       }
 250     } else {
 251       // XXX: 64-bit Immediates not supported yet
 252       assert(N->getValueType(0) != MVT::i64);
 253
 254       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
 255       if (C->getZExtValue() == 0) {
 256         ImmReg = AMDGPU::ZERO;
 257       } else if (C->getZExtValue() == 1) {
 258         ImmReg = AMDGPU::ONE_INT;
 259       } else {
 260         ImmValue = C->getZExtValue();
 261       }
 262     }
 263
 264     for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
 265                               Use != SDNode::use_end(); Use = Next) {
 266       Next = llvm::next(Use);
 267       std::vector<SDValue> Ops;
 268       for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
 269         Ops.push_back(Use->getOperand(i));
 270       }
 271
 272       if (!Use->isMachineOpcode()) {
 273           if (ImmReg == AMDGPU::ALU_LITERAL_X) {
 274             // We can only use literal constants (e.g. AMDGPU::ZERO,
 275             // AMDGPU::ONE, etc) in machine opcodes.
 276             continue;
 277           }
 278       } else {
 279         if (!TII->isALUInstr(Use->getMachineOpcode()) ||
 280             (TII->get(Use->getMachineOpcode()).TSFlags &
 281             R600_InstFlag::VECTOR)) {
 282           continue;
 283         }
 284
 285         int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), R600Operands::IMM);
 286         assert(ImmIdx != -1);
 287
 288         // subtract one from ImmIdx, because the DST operand is usually index
 289         // 0 for MachineInstrs, but we have no DST in the Ops vector.
 290         ImmIdx--;
 291
 292         // Check that we aren't already using an immediate.
 293         // XXX: It's possible for an instruction to have more than one
 294         // immediate operand, but this is not supported yet.
 295         if (ImmReg == AMDGPU::ALU_LITERAL_X) {
 296           ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
 297           assert(C);
 298
 299           if (C->getZExtValue() != 0) {
 300             // This instruction is already using an immediate.
 301             continue;
 302           }
 303
 304           // Set the immediate value
 305           Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
 306         }
 307       }
 308       // Set the immediate register
 309       Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);
 310
 311       CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
 312     }
 313     break;
 314   }
 315   }
 316   SDNode *Result = SelectCode(N);
 317
 318   // Fold operands of selected node
 319
 320   const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
 321   if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
 322     const R600InstrInfo *TII =
 323         static_cast<const R600InstrInfo*>(TM.getInstrInfo());
 324     if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) {
 325       bool IsModified = false;
 326       do {
 327         std::vector<SDValue> Ops;
 328         for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
 329             I != E; ++I)
 330           Ops.push_back(*I);
 331         IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops);
 332         if (IsModified) {
 333           Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
 334         }
 335       } while (IsModified);
 336
 337     }
 338     if (Result && Result->isMachineOpcode() &&
 339         !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
 340         && TII->isALUInstr(Result->getMachineOpcode())) {
 341       // Fold FNEG/FABS/CONST_ADDRESS
 342       // TODO: Isel can generate multiple MachineInst, we need to recursively
 343       // parse Result
 344       bool IsModified = false;
 345       do {
 346         std::vector<SDValue> Ops;
 347         for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
 348             I != E; ++I)
 349           Ops.push_back(*I);
 350         IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
 351         if (IsModified) {
 352           Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
 353         }
 354       } while (IsModified);
 355
 356       // If node has a single use which is CLAMP_R600, folds it
 357       if (Result->hasOneUse() && Result->isMachineOpcode()) {
 358         SDNode *PotentialClamp = *Result->use_begin();
 359         if (PotentialClamp->isMachineOpcode() &&
 360             PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
 361           unsigned ClampIdx =
 362             TII->getOperandIdx(Result->getMachineOpcode(), R600Operands::CLAMP);
 363           std::vector<SDValue> Ops;
 364           unsigned NumOp = Result->getNumOperands();
 365           for (unsigned i = 0; i < NumOp; ++i) {
 366             Ops.push_back(Result->getOperand(i));
 367           }
 368           Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
 369           Result = CurDAG->SelectNodeTo(PotentialClamp,
 370               Result->getMachineOpcode(), PotentialClamp->getVTList(),
 371               Ops.data(), NumOp);
 372         }
 373       }
 374     }
 375   }
 376
 377   return Result;
 378 }
 379
 380 bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg,
 381                                      SDValue &Abs, const R600InstrInfo *TII,
 382                                      std::vector<unsigned> Consts) {
 383   switch (Src.getOpcode()) {
 384   case AMDGPUISD::CONST_ADDRESS: {
 385     SDValue CstOffset;
 386     if (Src.getValueType().isVector() ||
 387         !SelectGlobalValueConstantOffset(Src.getOperand(0), CstOffset))
 388       return false;
 389
 390     ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
 391     Consts.push_back(Cst->getZExtValue());
 392     if (!TII->fitsConstReadLimitations(Consts))
 393       return false;
 394
 395     Src = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
 396     Sel = CstOffset;
 397     return true;
 398     }
 399   case ISD::FNEG:
 400     Src = Src.getOperand(0);
 401     Neg = CurDAG->getTargetConstant(1, MVT::i32);
 402     return true;
 403   case ISD::FABS:
 404     if (!Abs.getNode())
 405       return false;
 406     Src = Src.getOperand(0);
 407     Abs = CurDAG->getTargetConstant(1, MVT::i32);
 408     return true;
 409   case ISD::BITCAST:
 410     Src = Src.getOperand(0);
 411     return true;
 412   default:
 413     return false;
 414   }
 415 }
 416
 417 bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
 418     const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
 419   int OperandIdx[] = {
 420     TII->getOperandIdx(Opcode, R600Operands::SRC0),
 421     TII->getOperandIdx(Opcode, R600Operands::SRC1),
 422     TII->getOperandIdx(Opcode, R600Operands::SRC2)
 423   };
 424   int SelIdx[] = {
 425     TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL),
 426     TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL),
 427     TII->getOperandIdx(Opcode, R600Operands::SRC2_SEL)
 428   };
 429   int NegIdx[] = {
 430     TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG),
 431     TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG),
 432     TII->getOperandIdx(Opcode, R600Operands::SRC2_NEG)
 433   };
 434   int AbsIdx[] = {
 435     TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS),
 436     TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS),
 437     -1
 438   };
 439
 440   // Gather constants values
 441   std::vector<unsigned> Consts;
 442   for (unsigned j = 0; j < 3; j++) {
 443     int SrcIdx = OperandIdx[j];
 444     if (SrcIdx < 0)
 445       break;
 446     if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
 447       if (Reg->getReg() == AMDGPU::ALU_CONST) {
 448         ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
 449         Consts.push_back(Cst->getZExtValue());
 450       }
 451     }
 452   }
 453
 454   for (unsigned i = 0; i < 3; i++) {
 455     if (OperandIdx[i] < 0)
 456       return false;
 457     SDValue &Src = Ops[OperandIdx[i] - 1];
 458     SDValue &Sel = Ops[SelIdx[i] - 1];
 459     SDValue &Neg = Ops[NegIdx[i] - 1];
 460     SDValue FakeAbs;
 461     SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
 462     if (FoldOperand(Src, Sel, Neg, Abs, TII, Consts))
 463       return true;
 464   }
 465   return false;
 466 }
 467
 468 bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode,
 469     const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
 470   int OperandIdx[] = {
 471     TII->getOperandIdx(Opcode, R600Operands::SRC0_X),
 472     TII->getOperandIdx(Opcode, R600Operands::SRC0_Y),
 473     TII->getOperandIdx(Opcode, R600Operands::SRC0_Z),
 474     TII->getOperandIdx(Opcode, R600Operands::SRC0_W),
 475     TII->getOperandIdx(Opcode, R600Operands::SRC1_X),
 476     TII->getOperandIdx(Opcode, R600Operands::SRC1_Y),
 477     TII->getOperandIdx(Opcode, R600Operands::SRC1_Z),
 478     TII->getOperandIdx(Opcode, R600Operands::SRC1_W)
 479   };
 480   int SelIdx[] = {
 481     TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_X),
 482     TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_Y),
 483     TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_Z),
 484     TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_W),
 485     TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_X),
 486     TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_Y),
 487     TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_Z),
 488     TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_W)
 489   };
 490   int NegIdx[] = {
 491     TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_X),
 492     TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_Y),
 493     TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_Z),
 494     TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_W),
 495     TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_X),
 496     TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_Y),
 497     TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_Z),
 498     TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_W)
 499   };
 500   int AbsIdx[] = {
 501     TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_X),
 502     TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_Y),
 503     TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_Z),
 504     TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_W),
 505     TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_X),
 506     TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_Y),
 507     TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_Z),
 508     TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_W)
 509   };
 510
 511   // Gather constants values
 512   std::vector<unsigned> Consts;
 513   for (unsigned j = 0; j < 8; j++) {
 514     int SrcIdx = OperandIdx[j];
 515     if (SrcIdx < 0)
 516       break;
 517     if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
 518       if (Reg->getReg() == AMDGPU::ALU_CONST) {
 519         ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
 520         Consts.push_back(Cst->getZExtValue());
 521       }
 522     }
 523   }
 524
 525   for (unsigned i = 0; i < 8; i++) {
 526     if (OperandIdx[i] < 0)
 527       return false;
 528     SDValue &Src = Ops[OperandIdx[i] - 1];
 529     SDValue &Sel = Ops[SelIdx[i] - 1];
 530     SDValue &Neg = Ops[NegIdx[i] - 1];
 531     SDValue &Abs = Ops[AbsIdx[i] - 1];
 532     if (FoldOperand(Src, Sel, Neg, Abs, TII, Consts))
 533       return true;
 534   }
 535   return false;
 536 }
 537
 538 bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
 539   if (!ptr) {
 540     return false;
 541   }
 542   Type *ptrType = ptr->getType();
 543   return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
 544 }
 545
 546 const Value * AMDGPUDAGToDAGISel::getBasePointerValue(const Value *V) {
 547   if (!V) {
 548     return NULL;
 549   }
 550   const Value *ret = NULL;
 551   ValueMap<const Value *, bool> ValueBitMap;
 552   std::queue<const Value *, std::list<const Value *> > ValueQueue;
 553   ValueQueue.push(V);
 554   while (!ValueQueue.empty()) {
 555     V = ValueQueue.front();
 556     if (ValueBitMap.find(V) == ValueBitMap.end()) {
 557       ValueBitMap[V] = true;
 558       if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
 559         ret = V;
 560         break;
 561       } else if (dyn_cast<GlobalVariable>(V)) {
 562         ret = V;
 563         break;
 564       } else if (dyn_cast<Constant>(V)) {
 565         const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
 566         if (CE) {
 567           ValueQueue.push(CE->getOperand(0));
 568         }
 569       } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
 570         ret = AI;
 571         break;
 572       } else if (const Instruction *I = dyn_cast<Instruction>(V)) {
 573         uint32_t numOps = I->getNumOperands();
 574         for (uint32_t x = 0; x < numOps; ++x) {
 575           ValueQueue.push(I->getOperand(x));
 576         }
 577       } else {
 578         assert(!"Found a Value that we didn't know how to handle!");
 579       }
 580     }
 581     ValueQueue.pop();
 582   }
 583   return ret;
 584 }
 585
 586 bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
 587   return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
 588 }
 589
 590 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
 591   return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
 592           && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
 593           && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS));
 594 }
 595
 596 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
 597   return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
 598 }
 599
 600 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
 601   return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
 602 }
 603
 604 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
 605   if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) {
 606     return true;
 607   }
 608   MachineMemOperand *MMO = N->getMemOperand();
 609   const Value *V = MMO->getValue();
 610   const Value *BV = getBasePointerValue(V);
 611   if (MMO
 612       && MMO->getValue()
 613       && ((V && dyn_cast<GlobalValue>(V))
 614           || (BV && dyn_cast<GlobalValue>(
 615                         getBasePointerValue(MMO->getValue()))))) {
 616     return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS);
 617   } else {
 618     return false;
 619   }
 620 }
 621
 622 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
 623   return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
 624 }
 625
 626 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) {
 627   return checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS);
 628 }
 629
 630 bool AMDGPUDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) {
 631   return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
 632 }
 633
 634 bool AMDGPUDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) {
 635   return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
 636 }
 637
 638 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
 639   MachineMemOperand *MMO = N->getMemOperand();
 640   if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
 641     if (MMO) {
 642       const Value *V = MMO->getValue();
 643       const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
 644       if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
 645         return true;
 646       }
 647     }
 648   }
 649   return false;
 650 }
 651
 652 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
 653   if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
 654     // Check to make sure we are not a constant pool load or a constant load
 655     // that is marked as a private load
 656     if (isCPLoad(N) || isConstantLoad(N, -1)) {
 657       return false;
 658     }
 659   }
 660   if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
 661       && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
 662       && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)
 663       && !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)
 664       && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS)
 665       && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS)) {
 666     return true;
 667   }
 668   return false;
 669 }
 670
 671 const char *AMDGPUDAGToDAGISel::getPassName() const {
 672   return "AMDGPU DAG->DAG Pattern Instruction Selection";
 673 }
 674
 675 #ifdef DEBUGTMP
 676 #undef INT64_C
 677 #endif
 678 #undef DEBUGTMP
 679
 680 ///==== AMDGPU Functions ====///
 681
 682 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
 683     SDValue& IntPtr) {
 684   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
 685     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
 686     return true;
 687   }
 688   return false;
 689 }
 690
 691 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
 692     SDValue& BaseReg, SDValue &Offset) {
 693   if (!dyn_cast<ConstantSDNode>(Addr)) {
 694     BaseReg = Addr;
 695     Offset = CurDAG->getIntPtrConstant(0, true);
 696     return true;
 697   }
 698   return false;
 699 }
 700
 701 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
 702                                            SDValue &Offset) {
 703   ConstantSDNode * IMMOffset;
 704
 705   if (Addr.getOpcode() == ISD::ADD
 706       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
 707       && isInt<16>(IMMOffset->getZExtValue())) {
 708
 709       Base = Addr.getOperand(0);
 710       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
 711       return true;
 712   // If the pointer address is constant, we can move it to the offset field.
 713   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
 714              && isInt<16>(IMMOffset->getZExtValue())) {
 715     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
 716                                   SDLoc(CurDAG->getEntryNode()),
 717                                   AMDGPU::ZERO, MVT::i32);
 718     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
 719     return true;
 720   }
 721
 722   // Default case, no offset
 723   Base = Addr;
 724   Offset = CurDAG->getTargetConstant(0, MVT::i32);
 725   return true;
 726 }
 727
 728 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
 729                                             SDValue &Offset) {
 730   ConstantSDNode *C;
 731
 732   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
 733     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
 734     Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
 735   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
 736             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
 737     Base = Addr.getOperand(0);
 738     Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
 739   } else {
 740     Base = Addr;
 741     Offset = CurDAG->getTargetConstant(0, MVT::i32);
 742   }
 743
 744   return true;
 745 }
 746
 747 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
 748
 749   if (Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX) {
 750     return;
 751   }
 752
 753   // Go over all selected nodes and try to fold them a bit more
 754   const AMDGPUTargetLowering& Lowering = ((const AMDGPUTargetLowering&)TLI);
 755   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 756        E = CurDAG->allnodes_end(); I != E; ++I) {
 757
 758     SDNode *Node = I;
 759     switch (Node->getOpcode()) {
 760     // Fix the register class in copy to CopyToReg nodes - ISel will always
 761     // use SReg classes for 64-bit copies, but this is not always what we want.
 762     case ISD::CopyToReg: {
 763       unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
 764       SDValue Val = Node->getOperand(2);
 765       const TargetRegisterClass *RC = RegInfo->getRegClass(Reg);
 766       if (RC != &AMDGPU::SReg_64RegClass) {
 767         continue;
 768       }
 769
 770       if (!Val.getNode()->isMachineOpcode()) {
 771         continue;
 772       }
 773
 774       const MCInstrDesc Desc = TM.getInstrInfo()->get(Val.getNode()->getMachineOpcode());
 775       const TargetRegisterInfo *TRI = TM.getRegisterInfo();
 776       RegInfo->setRegClass(Reg, TRI->getRegClass(Desc.OpInfo[0].RegClass));
 777       continue;
 778     }
 779     }
 780
 781     MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
 782     if (!MachineNode)
 783       continue;
 784
 785     SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
 786     if (ResNode != Node) {
 787       ReplaceUses(Node, ResNode);
 788     }
 789   }
 790 }