lib/Target/R600/AMDGPUISelDAGToDAG.cpp

   1 //===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// \brief Defines an instruction selector for the AMDGPU target.
  12 //
  13 //===----------------------------------------------------------------------===//
  14 #include "AMDGPUInstrInfo.h"
  15 #include "AMDGPUISelLowering.h" // For AMDGPUISD
  16 #include "AMDGPURegisterInfo.h"
  17 #include "R600InstrInfo.h"
  18 #include "SIISelLowering.h"
  19 #include "llvm/ADT/ValueMap.h"
  20 #include "llvm/Analysis/ValueTracking.h"
  21 #include "llvm/CodeGen/MachineRegisterInfo.h"
  22 #include "llvm/CodeGen/PseudoSourceValue.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/Support/Compiler.h"
  26 #include <list>
  27 #include <queue>
  28
  29 using namespace llvm;
  30
  31 //===----------------------------------------------------------------------===//
  32 // Instruction Selector Implementation
  33 //===----------------------------------------------------------------------===//
  34
  35 namespace {
  36 /// AMDGPU specific code to select AMDGPU machine instructions for
  37 /// SelectionDAG operations.
  38 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
  39   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
  40   // make the right decision when generating code for different targets.
  41   const AMDGPUSubtarget &Subtarget;
  42 public:
  43   AMDGPUDAGToDAGISel(TargetMachine &TM);
  44   virtual ~AMDGPUDAGToDAGISel();
  45
  46   SDNode *Select(SDNode *N);
  47   virtual const char *getPassName() const;
  48   virtual void PostprocessISelDAG();
  49
  50 private:
  51   inline SDValue getSmallIPtrImm(unsigned Imm);
  52   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
  53                    const R600InstrInfo *TII);
  54   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
  55   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
  56
  57   // Complex pattern selectors
  58   bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
  59   bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
  60   bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
  61   SDValue SimplifyI24(SDValue &Op);
  62   bool SelectI24(SDValue Addr, SDValue &Op);
  63   bool SelectU24(SDValue Addr, SDValue &Op);
  64
  65   static bool checkType(const Value *ptr, unsigned int addrspace);
  66
  67   static bool isGlobalStore(const StoreSDNode *N);
  68   static bool isPrivateStore(const StoreSDNode *N);
  69   static bool isLocalStore(const StoreSDNode *N);
  70   static bool isRegionStore(const StoreSDNode *N);
  71
  72   bool isCPLoad(const LoadSDNode *N) const;
  73   bool isConstantLoad(const LoadSDNode *N, int cbID) const;
  74   bool isGlobalLoad(const LoadSDNode *N) const;
  75   bool isParamLoad(const LoadSDNode *N) const;
  76   bool isPrivateLoad(const LoadSDNode *N) const;
  77   bool isLocalLoad(const LoadSDNode *N) const;
  78   bool isRegionLoad(const LoadSDNode *N) const;
  79
  80   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
  81   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
  82   bool SelectGlobalValueVariableOffset(SDValue Addr,
  83       SDValue &BaseReg, SDValue& Offset);
  84   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
  85   bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
  86
  87   // Include the pieces autogenerated from the target description.
  88 #include "AMDGPUGenDAGISel.inc"
  89 };
  90 }  // end anonymous namespace
  91
  92 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
  93 // DAG, ready for instruction scheduling.
  94 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM
  95                                        ) {
  96   return new AMDGPUDAGToDAGISel(TM);
  97 }
  98
  99 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
 100   : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) {
 101 }
 102
 103 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
 104 }
 105
 106 /// \brief Determine the register class for \p OpNo
 107 /// \returns The register class of the virtual register that will be used for
 108 /// the given operand number \OpNo or NULL if the register class cannot be
 109 /// determined.
 110 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
 111                                                           unsigned OpNo) const {
 112   if (!N->isMachineOpcode()) {
 113     return NULL;
 114   }
 115   switch (N->getMachineOpcode()) {
 116   default: {
 117     const MCInstrDesc &Desc = TM.getInstrInfo()->get(N->getMachineOpcode());
 118     unsigned OpIdx = Desc.getNumDefs() + OpNo;
 119     if (OpIdx >= Desc.getNumOperands())
 120       return NULL;
 121     int RegClass = Desc.OpInfo[OpIdx].RegClass;
 122     if (RegClass == -1) {
 123       return NULL;
 124     }
 125     return TM.getRegisterInfo()->getRegClass(RegClass);
 126   }
 127   case AMDGPU::REG_SEQUENCE: {
 128     const TargetRegisterClass *SuperRC = TM.getRegisterInfo()->getRegClass(
 129                       cast<ConstantSDNode>(N->getOperand(0))->getZExtValue());
 130     unsigned SubRegIdx =
 131             dyn_cast<ConstantSDNode>(N->getOperand(OpNo + 1))->getZExtValue();
 132     return TM.getRegisterInfo()->getSubClassWithSubReg(SuperRC, SubRegIdx);
 133   }
 134   }
 135 }
 136
 137 SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
 138   return CurDAG->getTargetConstant(Imm, MVT::i32);
 139 }
 140
 141 bool AMDGPUDAGToDAGISel::SelectADDRParam(
 142     SDValue Addr, SDValue& R1, SDValue& R2) {
 143
 144   if (Addr.getOpcode() == ISD::FrameIndex) {
 145     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
 146       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
 147       R2 = CurDAG->getTargetConstant(0, MVT::i32);
 148     } else {
 149       R1 = Addr;
 150       R2 = CurDAG->getTargetConstant(0, MVT::i32);
 151     }
 152   } else if (Addr.getOpcode() == ISD::ADD) {
 153     R1 = Addr.getOperand(0);
 154     R2 = Addr.getOperand(1);
 155   } else {
 156     R1 = Addr;
 157     R2 = CurDAG->getTargetConstant(0, MVT::i32);
 158   }
 159   return true;
 160 }
 161
 162 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
 163   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
 164       Addr.getOpcode() == ISD::TargetGlobalAddress) {
 165     return false;
 166   }
 167   return SelectADDRParam(Addr, R1, R2);
 168 }
 169
 170
 171 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
 172   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
 173       Addr.getOpcode() == ISD::TargetGlobalAddress) {
 174     return false;
 175   }
 176
 177   if (Addr.getOpcode() == ISD::FrameIndex) {
 178     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
 179       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
 180       R2 = CurDAG->getTargetConstant(0, MVT::i64);
 181     } else {
 182       R1 = Addr;
 183       R2 = CurDAG->getTargetConstant(0, MVT::i64);
 184     }
 185   } else if (Addr.getOpcode() == ISD::ADD) {
 186     R1 = Addr.getOperand(0);
 187     R2 = Addr.getOperand(1);
 188   } else {
 189     R1 = Addr;
 190     R2 = CurDAG->getTargetConstant(0, MVT::i64);
 191   }
 192   return true;
 193 }
 194
 195 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
 196   const R600InstrInfo *TII =
 197                       static_cast<const R600InstrInfo*>(TM.getInstrInfo());
 198   unsigned int Opc = N->getOpcode();
 199   if (N->isMachineOpcode()) {
 200     return NULL;   // Already selected.
 201   }
 202   switch (Opc) {
 203   default: break;
 204   case AMDGPUISD::CONST_ADDRESS: {
 205     for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I);
 206                               I != SDNode::use_end(); I = Next) {
 207       Next = llvm::next(I);
 208       if (!I->isMachineOpcode()) {
 209         continue;
 210       }
 211       unsigned Opcode = I->getMachineOpcode();
 212       bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
 213       int SrcIdx = I.getOperandNo();
 214       int SelIdx;
 215       // Unlike MachineInstrs, SDNodes do not have results in their operand
 216       // list, so we need to increment the SrcIdx, since
 217       // R600InstrInfo::getOperandIdx is based on the MachineInstr indices.
 218       if (HasDst) {
 219         SrcIdx++;
 220       }
 221
 222       SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx);
 223       if (SelIdx < 0) {
 224         continue;
 225       }
 226
 227       SDValue CstOffset;
 228       if (N->getValueType(0).isVector() ||
 229           !SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset))
 230         continue;
 231
 232       // Gather constants values
 233       int SrcIndices[] = {
 234         TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
 235         TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
 236         TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
 237         TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
 238         TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
 239         TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
 240         TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
 241         TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
 242         TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
 243         TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
 244         TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
 245       };
 246       std::vector<unsigned> Consts;
 247       for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
 248         int OtherSrcIdx = SrcIndices[i];
 249         int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
 250         if (OtherSrcIdx < 0 || OtherSelIdx < 0) {
 251           continue;
 252         }
 253         if (HasDst) {
 254           OtherSrcIdx--;
 255           OtherSelIdx--;
 256         }
 257         if (RegisterSDNode *Reg =
 258                          dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) {
 259           if (Reg->getReg() == AMDGPU::ALU_CONST) {
 260             ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx));
 261             Consts.push_back(Cst->getZExtValue());
 262           }
 263         }
 264       }
 265
 266       ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
 267       Consts.push_back(Cst->getZExtValue());
 268       if (!TII->fitsConstReadLimitations(Consts))
 269         continue;
 270
 271       // Convert back to SDNode indices
 272       if (HasDst) {
 273         SrcIdx--;
 274         SelIdx--;
 275       }
 276       std::vector<SDValue> Ops;
 277       for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
 278         if (i == SrcIdx) {
 279           Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32));
 280         } else if (i == SelIdx) {
 281           Ops.push_back(CstOffset);
 282         } else {
 283           Ops.push_back(I->getOperand(i));
 284         }
 285       }
 286       CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size());
 287     }
 288     break;
 289   }
 290   case ISD::BUILD_VECTOR: {
 291     unsigned RegClassID;
 292     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
 293     const AMDGPURegisterInfo *TRI =
 294                    static_cast<const AMDGPURegisterInfo*>(TM.getRegisterInfo());
 295     const SIRegisterInfo *SIRI =
 296                    static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
 297     EVT VT = N->getValueType(0);
 298     unsigned NumVectorElts = VT.getVectorNumElements();
 299     assert(VT.getVectorElementType().bitsEq(MVT::i32));
 300     if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
 301       bool UseVReg = true;
 302       for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
 303                                                     U != E; ++U) {
 304         if (!U->isMachineOpcode()) {
 305           continue;
 306         }
 307         const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
 308         if (!RC) {
 309           continue;
 310         }
 311         if (SIRI->isSGPRClass(RC)) {
 312           UseVReg = false;
 313         }
 314       }
 315       switch(NumVectorElts) {
 316       case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID :
 317                                      AMDGPU::SReg_32RegClassID;
 318         break;
 319       case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
 320                                      AMDGPU::SReg_64RegClassID;
 321         break;
 322       case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
 323                                      AMDGPU::SReg_128RegClassID;
 324         break;
 325       case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
 326                                      AMDGPU::SReg_256RegClassID;
 327         break;
 328       case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
 329                                       AMDGPU::SReg_512RegClassID;
 330         break;
 331       }
 332     } else {
 333       // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
 334       // that adds a 128 bits reg copy when going through TwoAddressInstructions
 335       // pass. We want to avoid 128 bits copies as much as possible because they
 336       // can't be bundled by our scheduler.
 337       switch(NumVectorElts) {
 338       case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
 339       case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break;
 340       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
 341       }
 342     }
 343
 344     SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32);
 345
 346     if (NumVectorElts == 1) {
 347       return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS,
 348                                   VT.getVectorElementType(),
 349                                   N->getOperand(0), RegClass);
 350     }
 351
 352     assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
 353                                   "supported yet");
 354     // 16 = Max Num Vector Elements
 355     // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
 356     // 1 = Vector Register Class
 357     SDValue RegSeqArgs[16 * 2 + 1];
 358
 359     RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32);
 360     bool IsRegSeq = true;
 361     for (unsigned i = 0; i < N->getNumOperands(); i++) {
 362       // XXX: Why is this here?
 363       if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
 364         IsRegSeq = false;
 365         break;
 366       }
 367       RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
 368       RegSeqArgs[1 + (2 * i) + 1] =
 369               CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
 370     }
 371     if (!IsRegSeq)
 372       break;
 373     return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
 374         RegSeqArgs, 2 * N->getNumOperands() + 1);
 375   }
 376   case ISD::BUILD_PAIR: {
 377     SDValue RC, SubReg0, SubReg1;
 378     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
 379     if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
 380       break;
 381     }
 382     if (N->getValueType(0) == MVT::i128) {
 383       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
 384       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
 385       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
 386     } else if (N->getValueType(0) == MVT::i64) {
 387       RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32);
 388       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
 389       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
 390     } else {
 391       llvm_unreachable("Unhandled value type for BUILD_PAIR");
 392     }
 393     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
 394                             N->getOperand(1), SubReg1 };
 395     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
 396                                   SDLoc(N), N->getValueType(0), Ops);
 397   }
 398
 399   case ISD::ConstantFP:
 400   case ISD::Constant: {
 401     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
 402     // XXX: Custom immediate lowering not implemented yet.  Instead we use
 403     // pseudo instructions defined in SIInstructions.td
 404     if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
 405       break;
 406     }
 407
 408     uint64_t ImmValue = 0;
 409     unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
 410
 411     if (N->getOpcode() == ISD::ConstantFP) {
 412       // XXX: 64-bit Immediates not supported yet
 413       assert(N->getValueType(0) != MVT::f64);
 414
 415       ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
 416       APFloat Value = C->getValueAPF();
 417       float FloatValue = Value.convertToFloat();
 418       if (FloatValue == 0.0) {
 419         ImmReg = AMDGPU::ZERO;
 420       } else if (FloatValue == 0.5) {
 421         ImmReg = AMDGPU::HALF;
 422       } else if (FloatValue == 1.0) {
 423         ImmReg = AMDGPU::ONE;
 424       } else {
 425         ImmValue = Value.bitcastToAPInt().getZExtValue();
 426       }
 427     } else {
 428       // XXX: 64-bit Immediates not supported yet
 429       assert(N->getValueType(0) != MVT::i64);
 430
 431       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
 432       if (C->getZExtValue() == 0) {
 433         ImmReg = AMDGPU::ZERO;
 434       } else if (C->getZExtValue() == 1) {
 435         ImmReg = AMDGPU::ONE_INT;
 436       } else {
 437         ImmValue = C->getZExtValue();
 438       }
 439     }
 440
 441     for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
 442                               Use != SDNode::use_end(); Use = Next) {
 443       Next = llvm::next(Use);
 444       std::vector<SDValue> Ops;
 445       for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
 446         Ops.push_back(Use->getOperand(i));
 447       }
 448
 449       if (!Use->isMachineOpcode()) {
 450           if (ImmReg == AMDGPU::ALU_LITERAL_X) {
 451             // We can only use literal constants (e.g. AMDGPU::ZERO,
 452             // AMDGPU::ONE, etc) in machine opcodes.
 453             continue;
 454           }
 455       } else {
 456         switch(Use->getMachineOpcode()) {
 457         case AMDGPU::REG_SEQUENCE: break;
 458         default:
 459           if (!TII->isALUInstr(Use->getMachineOpcode()) ||
 460               (TII->get(Use->getMachineOpcode()).TSFlags &
 461                R600_InstFlag::VECTOR)) {
 462             continue;
 463           }
 464         }
 465
 466         // Check that we aren't already using an immediate.
 467         // XXX: It's possible for an instruction to have more than one
 468         // immediate operand, but this is not supported yet.
 469         if (ImmReg == AMDGPU::ALU_LITERAL_X) {
 470           int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(),
 471                                           AMDGPU::OpName::literal);
 472           if (ImmIdx == -1) {
 473             continue;
 474           }
 475
 476           if (TII->getOperandIdx(Use->getMachineOpcode(),
 477                                  AMDGPU::OpName::dst) != -1) {
 478             // subtract one from ImmIdx, because the DST operand is usually index
 479             // 0 for MachineInstrs, but we have no DST in the Ops vector.
 480             ImmIdx--;
 481           }
 482           ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
 483           assert(C);
 484
 485           if (C->getZExtValue() != 0) {
 486             // This instruction is already using an immediate.
 487             continue;
 488           }
 489
 490           // Set the immediate value
 491           Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
 492         }
 493       }
 494       // Set the immediate register
 495       Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);
 496
 497       CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
 498     }
 499     break;
 500   }
 501   }
 502   SDNode *Result = SelectCode(N);
 503
 504   // Fold operands of selected node
 505
 506   const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
 507   if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
 508     const R600InstrInfo *TII =
 509         static_cast<const R600InstrInfo*>(TM.getInstrInfo());
 510     if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) {
 511       bool IsModified = false;
 512       do {
 513         std::vector<SDValue> Ops;
 514         for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
 515             I != E; ++I)
 516           Ops.push_back(*I);
 517         IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops);
 518         if (IsModified) {
 519           Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
 520         }
 521       } while (IsModified);
 522
 523     }
 524     if (Result && Result->isMachineOpcode() &&
 525         !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
 526         && TII->hasInstrModifiers(Result->getMachineOpcode())) {
 527       // Fold FNEG/FABS
 528       // TODO: Isel can generate multiple MachineInst, we need to recursively
 529       // parse Result
 530       bool IsModified = false;
 531       do {
 532         std::vector<SDValue> Ops;
 533         for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
 534             I != E; ++I)
 535           Ops.push_back(*I);
 536         IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
 537         if (IsModified) {
 538           Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
 539         }
 540       } while (IsModified);
 541
 542       // If node has a single use which is CLAMP_R600, folds it
 543       if (Result->hasOneUse() && Result->isMachineOpcode()) {
 544         SDNode *PotentialClamp = *Result->use_begin();
 545         if (PotentialClamp->isMachineOpcode() &&
 546             PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
 547           unsigned ClampIdx =
 548             TII->getOperandIdx(Result->getMachineOpcode(), AMDGPU::OpName::clamp);
 549           std::vector<SDValue> Ops;
 550           unsigned NumOp = Result->getNumOperands();
 551           for (unsigned i = 0; i < NumOp; ++i) {
 552             Ops.push_back(Result->getOperand(i));
 553           }
 554           Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
 555           Result = CurDAG->SelectNodeTo(PotentialClamp,
 556               Result->getMachineOpcode(), PotentialClamp->getVTList(),
 557               Ops.data(), NumOp);
 558         }
 559       }
 560     }
 561   }
 562
 563   return Result;
 564 }
 565
 566 bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg,
 567                                      SDValue &Abs, const R600InstrInfo *TII) {
 568   switch (Src.getOpcode()) {
 569   case ISD::FNEG:
 570     Src = Src.getOperand(0);
 571     Neg = CurDAG->getTargetConstant(1, MVT::i32);
 572     return true;
 573   case ISD::FABS:
 574     if (!Abs.getNode())
 575       return false;
 576     Src = Src.getOperand(0);
 577     Abs = CurDAG->getTargetConstant(1, MVT::i32);
 578     return true;
 579   case ISD::BITCAST:
 580     Src = Src.getOperand(0);
 581     return true;
 582   default:
 583     return false;
 584   }
 585 }
 586
 587 bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
 588     const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
 589   int OperandIdx[] = {
 590     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
 591     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
 592     TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
 593   };
 594   int SelIdx[] = {
 595     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel),
 596     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel),
 597     TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_sel)
 598   };
 599   int NegIdx[] = {
 600     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
 601     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
 602     TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
 603   };
 604   int AbsIdx[] = {
 605     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
 606     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
 607     -1
 608   };
 609
 610
 611   for (unsigned i = 0; i < 3; i++) {
 612     if (OperandIdx[i] < 0)
 613       return false;
 614     SDValue &Src = Ops[OperandIdx[i] - 1];
 615     SDValue &Sel = Ops[SelIdx[i] - 1];
 616     SDValue &Neg = Ops[NegIdx[i] - 1];
 617     SDValue FakeAbs;
 618     SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
 619     if (FoldOperand(Src, Sel, Neg, Abs, TII))
 620       return true;
 621   }
 622   return false;
 623 }
 624
 625 bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode,
 626     const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
 627   int OperandIdx[] = {
 628     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
 629     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
 630     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
 631     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
 632     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
 633     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
 634     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
 635     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
 636   };
 637   int SelIdx[] = {
 638     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_X),
 639     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Y),
 640     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Z),
 641     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_W),
 642     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_X),
 643     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Y),
 644     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Z),
 645     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_W)
 646   };
 647   int NegIdx[] = {
 648     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
 649     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
 650     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
 651     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
 652     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
 653     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
 654     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
 655     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
 656   };
 657   int AbsIdx[] = {
 658     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
 659     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
 660     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
 661     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
 662     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
 663     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
 664     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
 665     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
 666   };
 667
 668   for (unsigned i = 0; i < 8; i++) {
 669     if (OperandIdx[i] < 0)
 670       return false;
 671     SDValue &Src = Ops[OperandIdx[i] - 1];
 672     SDValue &Sel = Ops[SelIdx[i] - 1];
 673     SDValue &Neg = Ops[NegIdx[i] - 1];
 674     SDValue &Abs = Ops[AbsIdx[i] - 1];
 675     if (FoldOperand(Src, Sel, Neg, Abs, TII))
 676       return true;
 677   }
 678   return false;
 679 }
 680
 681 bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
 682   if (!ptr) {
 683     return false;
 684   }
 685   Type *ptrType = ptr->getType();
 686   return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
 687 }
 688
 689 bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
 690   return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
 691 }
 692
 693 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
 694   return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
 695           && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
 696           && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS));
 697 }
 698
 699 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
 700   return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
 701 }
 702
 703 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
 704   return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
 705 }
 706
 707 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
 708   if (CbId == -1) {
 709     return checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS);
 710   }
 711   return checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
 712 }
 713
 714 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
 715   if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) {
 716     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
 717     if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
 718         N->getMemoryVT().bitsLT(MVT::i32)) {
 719       return true;
 720     }
 721   }
 722   return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
 723 }
 724
 725 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
 726   return checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS);
 727 }
 728
 729 bool AMDGPUDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) const {
 730   return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
 731 }
 732
 733 bool AMDGPUDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) const {
 734   return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
 735 }
 736
 737 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
 738   MachineMemOperand *MMO = N->getMemOperand();
 739   if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
 740     if (MMO) {
 741       const Value *V = MMO->getValue();
 742       const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
 743       if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
 744         return true;
 745       }
 746     }
 747   }
 748   return false;
 749 }
 750
 751 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
 752   if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
 753     // Check to make sure we are not a constant pool load or a constant load
 754     // that is marked as a private load
 755     if (isCPLoad(N) || isConstantLoad(N, -1)) {
 756       return false;
 757     }
 758   }
 759   if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
 760       && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
 761       && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)
 762       && !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)
 763       && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS)
 764       && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS)) {
 765     return true;
 766   }
 767   return false;
 768 }
 769
 770 const char *AMDGPUDAGToDAGISel::getPassName() const {
 771   return "AMDGPU DAG->DAG Pattern Instruction Selection";
 772 }
 773
 774 #ifdef DEBUGTMP
 775 #undef INT64_C
 776 #endif
 777 #undef DEBUGTMP
 778
 779 //===----------------------------------------------------------------------===//
 780 // Complex Patterns
 781 //===----------------------------------------------------------------------===//
 782
 783 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
 784     SDValue& IntPtr) {
 785   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
 786     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
 787     return true;
 788   }
 789   return false;
 790 }
 791
 792 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
 793     SDValue& BaseReg, SDValue &Offset) {
 794   if (!dyn_cast<ConstantSDNode>(Addr)) {
 795     BaseReg = Addr;
 796     Offset = CurDAG->getIntPtrConstant(0, true);
 797     return true;
 798   }
 799   return false;
 800 }
 801
 802 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
 803                                            SDValue &Offset) {
 804   ConstantSDNode * IMMOffset;
 805
 806   if (Addr.getOpcode() == ISD::ADD
 807       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
 808       && isInt<16>(IMMOffset->getZExtValue())) {
 809
 810       Base = Addr.getOperand(0);
 811       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
 812       return true;
 813   // If the pointer address is constant, we can move it to the offset field.
 814   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
 815              && isInt<16>(IMMOffset->getZExtValue())) {
 816     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
 817                                   SDLoc(CurDAG->getEntryNode()),
 818                                   AMDGPU::ZERO, MVT::i32);
 819     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
 820     return true;
 821   }
 822
 823   // Default case, no offset
 824   Base = Addr;
 825   Offset = CurDAG->getTargetConstant(0, MVT::i32);
 826   return true;
 827 }
 828
 829 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
 830                                             SDValue &Offset) {
 831   ConstantSDNode *C;
 832
 833   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
 834     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
 835     Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
 836   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
 837             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
 838     Base = Addr.getOperand(0);
 839     Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
 840   } else {
 841     Base = Addr;
 842     Offset = CurDAG->getTargetConstant(0, MVT::i32);
 843   }
 844
 845   return true;
 846 }
 847
 848 SDValue AMDGPUDAGToDAGISel::SimplifyI24(SDValue &Op) {
 849   APInt Demanded = APInt(32, 0x00FFFFFF);
 850   APInt KnownZero, KnownOne;
 851   TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true);
 852   const TargetLowering *TLI = getTargetLowering();
 853   if (TLI->SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) {
 854     CurDAG->ReplaceAllUsesWith(Op, TLO.New);
 855     CurDAG->RepositionNode(Op.getNode(), TLO.New.getNode());
 856     return SimplifyI24(TLO.New);
 857   } else {
 858     return  Op;
 859   }
 860 }
 861
 862 bool AMDGPUDAGToDAGISel::SelectI24(SDValue Op, SDValue &I24) {
 863
 864   assert(Op.getValueType() == MVT::i32);
 865
 866   if (CurDAG->ComputeNumSignBits(Op) == 9) {
 867     I24 = SimplifyI24(Op);
 868     return true;
 869   }
 870   return false;
 871 }
 872
 873 bool AMDGPUDAGToDAGISel::SelectU24(SDValue Op, SDValue &U24) {
 874   APInt KnownZero;
 875   APInt KnownOne;
 876   CurDAG->ComputeMaskedBits(Op, KnownZero, KnownOne);
 877
 878   assert (Op.getValueType() == MVT::i32);
 879
 880   // ANY_EXTEND and EXTLOAD operations can only be done on types smaller than
 881   // i32.  These smaller types are legal to use with the i24 instructions.
 882   if ((KnownZero & APInt(KnownZero.getBitWidth(), 0xFF000000)) == 0xFF000000 ||
 883        Op.getOpcode() == ISD::ANY_EXTEND ||
 884        ISD::isEXTLoad(Op.getNode())) {
 885     U24 = SimplifyI24(Op);
 886     return true;
 887   }
 888   return false;
 889 }
 890
 891 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
 892
 893   if (Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
 894     return;
 895   }
 896
 897   // Go over all selected nodes and try to fold them a bit more
 898   const AMDGPUTargetLowering& Lowering =
 899     (*(const AMDGPUTargetLowering*)getTargetLowering());
 900   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 901        E = CurDAG->allnodes_end(); I != E; ++I) {
 902
 903     SDNode *Node = I;
 904
 905     MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
 906     if (!MachineNode)
 907       continue;
 908
 909     SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
 910     if (ResNode != Node) {
 911       ReplaceUses(Node, ResNode);
 912     }
 913   }
 914 }