lib/Target/R600/AMDGPUISelDAGToDAG.cpp

   1 //===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// \brief Defines an instruction selector for the AMDGPU target.
  12 //
  13 //===----------------------------------------------------------------------===//
  14 #include "AMDGPUInstrInfo.h"
  15 #include "AMDGPUISelLowering.h" // For AMDGPUISD
  16 #include "AMDGPURegisterInfo.h"
  17 #include "AMDGPUSubtarget.h"
  18 #include "R600InstrInfo.h"
  19 #include "SIDefines.h"
  20 #include "SIISelLowering.h"
  21 #include "SIMachineFunctionInfo.h"
  22 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  23 #include "llvm/CodeGen/PseudoSourceValue.h"
  24 #include "llvm/CodeGen/MachineFrameInfo.h"
  25 #include "llvm/CodeGen/MachineRegisterInfo.h"
  26 #include "llvm/CodeGen/SelectionDAG.h"
  27 #include "llvm/CodeGen/SelectionDAGISel.h"
  28 #include "llvm/IR/Function.h"
  29
  30 using namespace llvm;
  31
  32 //===----------------------------------------------------------------------===//
  33 // Instruction Selector Implementation
  34 //===----------------------------------------------------------------------===//
  35
  36 namespace {
  37 /// AMDGPU specific code to select AMDGPU machine instructions for
  38 /// SelectionDAG operations.
  39 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
  40   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
  41   // make the right decision when generating code for different targets.
  42   const AMDGPUSubtarget &Subtarget;
  43 public:
  44   AMDGPUDAGToDAGISel(TargetMachine &TM);
  45   virtual ~AMDGPUDAGToDAGISel();
  46
  47   SDNode *Select(SDNode *N) override;
  48   const char *getPassName() const override;
  49   void PostprocessISelDAG() override;
  50
  51 private:
  52   bool isInlineImmediate(SDNode *N) const;
  53   inline SDValue getSmallIPtrImm(unsigned Imm);
  54   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
  55                    const R600InstrInfo *TII);
  56   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
  57   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
  58
  59   // Complex pattern selectors
  60   bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
  61   bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
  62   bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
  63
  64   static bool checkType(const Value *ptr, unsigned int addrspace);
  65   static bool checkPrivateAddress(const MachineMemOperand *Op);
  66
  67   static bool isGlobalStore(const StoreSDNode *N);
  68   static bool isPrivateStore(const StoreSDNode *N);
  69   static bool isLocalStore(const StoreSDNode *N);
  70   static bool isRegionStore(const StoreSDNode *N);
  71
  72   bool isCPLoad(const LoadSDNode *N) const;
  73   bool isConstantLoad(const LoadSDNode *N, int cbID) const;
  74   bool isGlobalLoad(const LoadSDNode *N) const;
  75   bool isParamLoad(const LoadSDNode *N) const;
  76   bool isPrivateLoad(const LoadSDNode *N) const;
  77   bool isLocalLoad(const LoadSDNode *N) const;
  78   bool isRegionLoad(const LoadSDNode *N) const;
  79
  80   /// \returns True if the current basic block being selected is at control
  81   ///          flow depth 0.  Meaning that the current block dominates the
  82   //           exit block.
  83   bool isCFDepth0() const;
  84
  85   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
  86   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
  87   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
  88                                        SDValue& Offset);
  89   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
  90   bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
  91   void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
  92                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
  93                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
  94                    SDValue &TFE) const;
  95   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
  96                          SDValue &Offset) const;
  97   bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
  98                           SDValue &SOffset, SDValue &ImmOffset) const;
  99   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
 100                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
 101                          SDValue &TFE) const;
 102   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
 103   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
 104                        SDValue &Clamp, SDValue &Omod) const;
 105
 106   SDNode *SelectADD_SUB_I64(SDNode *N);
 107   SDNode *SelectDIV_SCALE(SDNode *N);
 108
 109   // Include the pieces autogenerated from the target description.
 110 #include "AMDGPUGenDAGISel.inc"
 111 };
 112 }  // end anonymous namespace
 113
 114 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
 115 // DAG, ready for instruction scheduling.
 116 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
 117   return new AMDGPUDAGToDAGISel(TM);
 118 }
 119
 120 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
 121   : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) {
 122 }
 123
 124 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
 125 }
 126
 127 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
 128   const SITargetLowering *TL
 129       = static_cast<const SITargetLowering *>(getTargetLowering());
 130   return TL->analyzeImmediate(N) == 0;
 131 }
 132
 133 /// \brief Determine the register class for \p OpNo
 134 /// \returns The register class of the virtual register that will be used for
 135 /// the given operand number \OpNo or NULL if the register class cannot be
 136 /// determined.
 137 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
 138                                                           unsigned OpNo) const {
 139   if (!N->isMachineOpcode())
 140     return nullptr;
 141
 142   switch (N->getMachineOpcode()) {
 143   default: {
 144     const MCInstrDesc &Desc =
 145         TM.getSubtargetImpl()->getInstrInfo()->get(N->getMachineOpcode());
 146     unsigned OpIdx = Desc.getNumDefs() + OpNo;
 147     if (OpIdx >= Desc.getNumOperands())
 148       return nullptr;
 149     int RegClass = Desc.OpInfo[OpIdx].RegClass;
 150     if (RegClass == -1)
 151       return nullptr;
 152
 153     return TM.getSubtargetImpl()->getRegisterInfo()->getRegClass(RegClass);
 154   }
 155   case AMDGPU::REG_SEQUENCE: {
 156     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
 157     const TargetRegisterClass *SuperRC =
 158         TM.getSubtargetImpl()->getRegisterInfo()->getRegClass(RCID);
 159
 160     SDValue SubRegOp = N->getOperand(OpNo + 1);
 161     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
 162     return TM.getSubtargetImpl()->getRegisterInfo()->getSubClassWithSubReg(
 163         SuperRC, SubRegIdx);
 164   }
 165   }
 166 }
 167
 168 SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
 169   return CurDAG->getTargetConstant(Imm, MVT::i32);
 170 }
 171
 172 bool AMDGPUDAGToDAGISel::SelectADDRParam(
 173   SDValue Addr, SDValue& R1, SDValue& R2) {
 174
 175   if (Addr.getOpcode() == ISD::FrameIndex) {
 176     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
 177       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
 178       R2 = CurDAG->getTargetConstant(0, MVT::i32);
 179     } else {
 180       R1 = Addr;
 181       R2 = CurDAG->getTargetConstant(0, MVT::i32);
 182     }
 183   } else if (Addr.getOpcode() == ISD::ADD) {
 184     R1 = Addr.getOperand(0);
 185     R2 = Addr.getOperand(1);
 186   } else {
 187     R1 = Addr;
 188     R2 = CurDAG->getTargetConstant(0, MVT::i32);
 189   }
 190   return true;
 191 }
 192
 193 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
 194   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
 195       Addr.getOpcode() == ISD::TargetGlobalAddress) {
 196     return false;
 197   }
 198   return SelectADDRParam(Addr, R1, R2);
 199 }
 200
 201
 202 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
 203   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
 204       Addr.getOpcode() == ISD::TargetGlobalAddress) {
 205     return false;
 206   }
 207
 208   if (Addr.getOpcode() == ISD::FrameIndex) {
 209     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
 210       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
 211       R2 = CurDAG->getTargetConstant(0, MVT::i64);
 212     } else {
 213       R1 = Addr;
 214       R2 = CurDAG->getTargetConstant(0, MVT::i64);
 215     }
 216   } else if (Addr.getOpcode() == ISD::ADD) {
 217     R1 = Addr.getOperand(0);
 218     R2 = Addr.getOperand(1);
 219   } else {
 220     R1 = Addr;
 221     R2 = CurDAG->getTargetConstant(0, MVT::i64);
 222   }
 223   return true;
 224 }
 225
 226 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
 227   unsigned int Opc = N->getOpcode();
 228   if (N->isMachineOpcode()) {
 229     N->setNodeId(-1);
 230     return nullptr;   // Already selected.
 231   }
 232
 233   const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
 234   switch (Opc) {
 235   default: break;
 236   // We are selecting i64 ADD here instead of custom lower it during
 237   // DAG legalization, so we can fold some i64 ADDs used for address
 238   // calculation into the LOAD and STORE instructions.
 239   case ISD::ADD:
 240   case ISD::SUB: {
 241     if (N->getValueType(0) != MVT::i64 ||
 242         ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
 243       break;
 244
 245     return SelectADD_SUB_I64(N);
 246   }
 247   case ISD::SCALAR_TO_VECTOR:
 248   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
 249   case ISD::BUILD_VECTOR: {
 250     unsigned RegClassID;
 251     const AMDGPURegisterInfo *TRI = static_cast<const AMDGPURegisterInfo *>(
 252         TM.getSubtargetImpl()->getRegisterInfo());
 253     const SIRegisterInfo *SIRI = static_cast<const SIRegisterInfo *>(
 254         TM.getSubtargetImpl()->getRegisterInfo());
 255     EVT VT = N->getValueType(0);
 256     unsigned NumVectorElts = VT.getVectorNumElements();
 257     EVT EltVT = VT.getVectorElementType();
 258     assert(EltVT.bitsEq(MVT::i32));
 259     if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
 260       bool UseVReg = true;
 261       for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
 262                                                     U != E; ++U) {
 263         if (!U->isMachineOpcode()) {
 264           continue;
 265         }
 266         const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
 267         if (!RC) {
 268           continue;
 269         }
 270         if (SIRI->isSGPRClass(RC)) {
 271           UseVReg = false;
 272         }
 273       }
 274       switch(NumVectorElts) {
 275       case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID :
 276                                      AMDGPU::SReg_32RegClassID;
 277         break;
 278       case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
 279                                      AMDGPU::SReg_64RegClassID;
 280         break;
 281       case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
 282                                      AMDGPU::SReg_128RegClassID;
 283         break;
 284       case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
 285                                      AMDGPU::SReg_256RegClassID;
 286         break;
 287       case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
 288                                       AMDGPU::SReg_512RegClassID;
 289         break;
 290       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
 291       }
 292     } else {
 293       // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
 294       // that adds a 128 bits reg copy when going through TwoAddressInstructions
 295       // pass. We want to avoid 128 bits copies as much as possible because they
 296       // can't be bundled by our scheduler.
 297       switch(NumVectorElts) {
 298       case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
 299       case 4:
 300         if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
 301           RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
 302         else
 303           RegClassID = AMDGPU::R600_Reg128RegClassID;
 304         break;
 305       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
 306       }
 307     }
 308
 309     SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32);
 310
 311     if (NumVectorElts == 1) {
 312       return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
 313                                   N->getOperand(0), RegClass);
 314     }
 315
 316     assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
 317                                   "supported yet");
 318     // 16 = Max Num Vector Elements
 319     // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
 320     // 1 = Vector Register Class
 321     SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
 322
 323     RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32);
 324     bool IsRegSeq = true;
 325     unsigned NOps = N->getNumOperands();
 326     for (unsigned i = 0; i < NOps; i++) {
 327       // XXX: Why is this here?
 328       if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
 329         IsRegSeq = false;
 330         break;
 331       }
 332       RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
 333       RegSeqArgs[1 + (2 * i) + 1] =
 334               CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
 335     }
 336
 337     if (NOps != NumVectorElts) {
 338       // Fill in the missing undef elements if this was a scalar_to_vector.
 339       assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
 340
 341       MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
 342                                                      SDLoc(N), EltVT);
 343       for (unsigned i = NOps; i < NumVectorElts; ++i) {
 344         RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
 345         RegSeqArgs[1 + (2 * i) + 1] =
 346           CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
 347       }
 348     }
 349
 350     if (!IsRegSeq)
 351       break;
 352     return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
 353                                 RegSeqArgs);
 354   }
 355   case ISD::BUILD_PAIR: {
 356     SDValue RC, SubReg0, SubReg1;
 357     if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
 358       break;
 359     }
 360     if (N->getValueType(0) == MVT::i128) {
 361       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
 362       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
 363       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
 364     } else if (N->getValueType(0) == MVT::i64) {
 365       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32);
 366       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
 367       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
 368     } else {
 369       llvm_unreachable("Unhandled value type for BUILD_PAIR");
 370     }
 371     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
 372                             N->getOperand(1), SubReg1 };
 373     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
 374                                   SDLoc(N), N->getValueType(0), Ops);
 375   }
 376
 377   case ISD::Constant:
 378   case ISD::ConstantFP: {
 379     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
 380     if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
 381         N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
 382       break;
 383
 384     uint64_t Imm;
 385     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
 386       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
 387     else {
 388       ConstantSDNode *C = cast<ConstantSDNode>(N);
 389       Imm = C->getZExtValue();
 390     }
 391
 392     SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
 393                                 CurDAG->getConstant(Imm & 0xFFFFFFFF, MVT::i32));
 394     SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
 395                                 CurDAG->getConstant(Imm >> 32, MVT::i32));
 396     const SDValue Ops[] = {
 397       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
 398       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
 399       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32)
 400     };
 401
 402     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N),
 403                                   N->getValueType(0), Ops);
 404   }
 405
 406   case AMDGPUISD::REGISTER_LOAD: {
 407     if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
 408       break;
 409     SDValue Addr, Offset;
 410
 411     SelectADDRIndirect(N->getOperand(1), Addr, Offset);
 412     const SDValue Ops[] = {
 413       Addr,
 414       Offset,
 415       CurDAG->getTargetConstant(0, MVT::i32),
 416       N->getOperand(0),
 417     };
 418     return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N),
 419                                   CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other),
 420                                   Ops);
 421   }
 422   case AMDGPUISD::REGISTER_STORE: {
 423     if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
 424       break;
 425     SDValue Addr, Offset;
 426     SelectADDRIndirect(N->getOperand(2), Addr, Offset);
 427     const SDValue Ops[] = {
 428       N->getOperand(1),
 429       Addr,
 430       Offset,
 431       CurDAG->getTargetConstant(0, MVT::i32),
 432       N->getOperand(0),
 433     };
 434     return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N),
 435                                         CurDAG->getVTList(MVT::Other),
 436                                         Ops);
 437   }
 438
 439   case AMDGPUISD::BFE_I32:
 440   case AMDGPUISD::BFE_U32: {
 441     if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
 442       break;
 443
 444     // There is a scalar version available, but unlike the vector version which
 445     // has a separate operand for the offset and width, the scalar version packs
 446     // the width and offset into a single operand. Try to move to the scalar
 447     // version if the offsets are constant, so that we can try to keep extended
 448     // loads of kernel arguments in SGPRs.
 449
 450     // TODO: Technically we could try to pattern match scalar bitshifts of
 451     // dynamic values, but it's probably not useful.
 452     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
 453     if (!Offset)
 454       break;
 455
 456     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
 457     if (!Width)
 458       break;
 459
 460     bool Signed = Opc == AMDGPUISD::BFE_I32;
 461
 462     // Transformation function, pack the offset and width of a BFE into
 463     // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
 464     // source, bits [5:0] contain the offset and bits [22:16] the width.
 465
 466     uint32_t OffsetVal = Offset->getZExtValue();
 467     uint32_t WidthVal = Width->getZExtValue();
 468
 469     uint32_t PackedVal = OffsetVal | WidthVal << 16;
 470
 471     SDValue PackedOffsetWidth = CurDAG->getTargetConstant(PackedVal, MVT::i32);
 472     return CurDAG->getMachineNode(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
 473                                   SDLoc(N),
 474                                   MVT::i32,
 475                                   N->getOperand(0),
 476                                   PackedOffsetWidth);
 477
 478   }
 479   case AMDGPUISD::DIV_SCALE: {
 480     return SelectDIV_SCALE(N);
 481   }
 482   }
 483   return SelectCode(N);
 484 }
 485
 486
 487 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
 488   assert(AS != 0 && "Use checkPrivateAddress instead.");
 489   if (!Ptr)
 490     return false;
 491
 492   return Ptr->getType()->getPointerAddressSpace() == AS;
 493 }
 494
 495 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
 496   if (Op->getPseudoValue())
 497     return true;
 498
 499   if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
 500     return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
 501
 502   return false;
 503 }
 504
 505 bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
 506   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
 507 }
 508
 509 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
 510   const Value *MemVal = N->getMemOperand()->getValue();
 511   return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
 512           !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
 513           !checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
 514 }
 515
 516 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
 517   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
 518 }
 519
 520 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
 521   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
 522 }
 523
 524 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
 525   const Value *MemVal = N->getMemOperand()->getValue();
 526   if (CbId == -1)
 527     return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
 528
 529   return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
 530 }
 531
 532 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
 533   if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) {
 534     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
 535     if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
 536         N->getMemoryVT().bitsLT(MVT::i32)) {
 537       return true;
 538     }
 539   }
 540   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
 541 }
 542
 543 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
 544   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
 545 }
 546
 547 bool AMDGPUDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) const {
 548   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
 549 }
 550
 551 bool AMDGPUDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) const {
 552   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
 553 }
 554
 555 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
 556   MachineMemOperand *MMO = N->getMemOperand();
 557   if (checkPrivateAddress(N->getMemOperand())) {
 558     if (MMO) {
 559       const PseudoSourceValue *PSV = MMO->getPseudoValue();
 560       if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
 561         return true;
 562       }
 563     }
 564   }
 565   return false;
 566 }
 567
 568 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
 569   if (checkPrivateAddress(N->getMemOperand())) {
 570     // Check to make sure we are not a constant pool load or a constant load
 571     // that is marked as a private load
 572     if (isCPLoad(N) || isConstantLoad(N, -1)) {
 573       return false;
 574     }
 575   }
 576
 577   const Value *MemVal = N->getMemOperand()->getValue();
 578   if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
 579       !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
 580       !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
 581       !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
 582       !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
 583       !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)){
 584     return true;
 585   }
 586   return false;
 587 }
 588
 589 bool AMDGPUDAGToDAGISel::isCFDepth0() const {
 590   // FIXME: Figure out a way to use DominatorTree analysis here.
 591   const BasicBlock *CurBlock = FuncInfo->MBB->getBasicBlock();
 592   const Function *Fn = FuncInfo->Fn;
 593   return &Fn->front() == CurBlock || &Fn->back() == CurBlock;
 594 }
 595
 596
 597 const char *AMDGPUDAGToDAGISel::getPassName() const {
 598   return "AMDGPU DAG->DAG Pattern Instruction Selection";
 599 }
 600
 601 #ifdef DEBUGTMP
 602 #undef INT64_C
 603 #endif
 604 #undef DEBUGTMP
 605
 606 //===----------------------------------------------------------------------===//
 607 // Complex Patterns
 608 //===----------------------------------------------------------------------===//
 609
 610 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
 611                                                          SDValue& IntPtr) {
 612   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
 613     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
 614     return true;
 615   }
 616   return false;
 617 }
 618
 619 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
 620     SDValue& BaseReg, SDValue &Offset) {
 621   if (!isa<ConstantSDNode>(Addr)) {
 622     BaseReg = Addr;
 623     Offset = CurDAG->getIntPtrConstant(0, true);
 624     return true;
 625   }
 626   return false;
 627 }
 628
 629 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
 630                                            SDValue &Offset) {
 631   ConstantSDNode *IMMOffset;
 632
 633   if (Addr.getOpcode() == ISD::ADD
 634       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
 635       && isInt<16>(IMMOffset->getZExtValue())) {
 636
 637       Base = Addr.getOperand(0);
 638       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
 639       return true;
 640   // If the pointer address is constant, we can move it to the offset field.
 641   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
 642              && isInt<16>(IMMOffset->getZExtValue())) {
 643     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
 644                                   SDLoc(CurDAG->getEntryNode()),
 645                                   AMDGPU::ZERO, MVT::i32);
 646     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
 647     return true;
 648   }
 649
 650   // Default case, no offset
 651   Base = Addr;
 652   Offset = CurDAG->getTargetConstant(0, MVT::i32);
 653   return true;
 654 }
 655
 656 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
 657                                             SDValue &Offset) {
 658   ConstantSDNode *C;
 659
 660   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
 661     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
 662     Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
 663   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
 664             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
 665     Base = Addr.getOperand(0);
 666     Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
 667   } else {
 668     Base = Addr;
 669     Offset = CurDAG->getTargetConstant(0, MVT::i32);
 670   }
 671
 672   return true;
 673 }
 674
 675 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
 676   SDLoc DL(N);
 677   SDValue LHS = N->getOperand(0);
 678   SDValue RHS = N->getOperand(1);
 679
 680   bool IsAdd = (N->getOpcode() == ISD::ADD);
 681
 682   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
 683   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
 684
 685   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
 686                                        DL, MVT::i32, LHS, Sub0);
 687   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
 688                                        DL, MVT::i32, LHS, Sub1);
 689
 690   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
 691                                        DL, MVT::i32, RHS, Sub0);
 692   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
 693                                        DL, MVT::i32, RHS, Sub1);
 694
 695   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
 696   SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
 697
 698
 699   unsigned Opc = IsAdd ? AMDGPU::S_ADD_I32 : AMDGPU::S_SUB_I32;
 700   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
 701
 702   if (!isCFDepth0()) {
 703     Opc = IsAdd ? AMDGPU::V_ADD_I32_e32 : AMDGPU::V_SUB_I32_e32;
 704     CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e32 : AMDGPU::V_SUBB_U32_e32;
 705   }
 706
 707   SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
 708   SDValue Carry(AddLo, 1);
 709   SDNode *AddHi
 710     = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
 711                              SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
 712
 713   SDValue Args[5] = {
 714     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
 715     SDValue(AddLo,0),
 716     Sub0,
 717     SDValue(AddHi,0),
 718     Sub1,
 719   };
 720   return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
 721 }
 722
 723 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
 724   SDLoc SL(N);
 725   EVT VT = N->getValueType(0);
 726
 727   assert(VT == MVT::f32 || VT == MVT::f64);
 728
 729   unsigned Opc
 730     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
 731
 732   const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
 733
 734   SDValue Ops[] = {
 735     N->getOperand(0),
 736     N->getOperand(1),
 737     N->getOperand(2),
 738     Zero,
 739     Zero,
 740     Zero,
 741     Zero
 742   };
 743
 744   return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
 745 }
 746
 747 static SDValue wrapAddr64Rsrc(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
 748   return SDValue(DAG->getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::v4i32,
 749                                      Ptr), 0);
 750 }
 751
 752 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
 753   return isUInt<12>(Imm->getZExtValue());
 754 }
 755
 756 void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
 757                                      SDValue &VAddr, SDValue &SOffset,
 758                                      SDValue &Offset, SDValue &Offen,
 759                                      SDValue &Idxen, SDValue &Addr64,
 760                                      SDValue &GLC, SDValue &SLC,
 761                                      SDValue &TFE) const {
 762   SDLoc DL(Addr);
 763
 764   GLC = CurDAG->getTargetConstant(0, MVT::i1);
 765   SLC = CurDAG->getTargetConstant(0, MVT::i1);
 766   TFE = CurDAG->getTargetConstant(0, MVT::i1);
 767
 768   Idxen = CurDAG->getTargetConstant(0, MVT::i1);
 769   Offen = CurDAG->getTargetConstant(0, MVT::i1);
 770   Addr64 = CurDAG->getTargetConstant(0, MVT::i1);
 771   SOffset = CurDAG->getTargetConstant(0, MVT::i32);
 772
 773   if (CurDAG->isBaseWithConstantOffset(Addr)) {
 774     SDValue N0 = Addr.getOperand(0);
 775     SDValue N1 = Addr.getOperand(1);
 776     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
 777
 778     if (isLegalMUBUFImmOffset(C1)) {
 779
 780       if (N0.getOpcode() == ISD::ADD) {
 781         // (add (add N2, N3), C1) -> addr64
 782         SDValue N2 = N0.getOperand(0);
 783         SDValue N3 = N0.getOperand(1);
 784         Addr64 = CurDAG->getTargetConstant(1, MVT::i1);
 785         Ptr = N2;
 786         VAddr = N3;
 787         Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
 788         return;
 789       }
 790
 791       // (add N0, C1) -> offset
 792       VAddr = CurDAG->getTargetConstant(0, MVT::i32);
 793       Ptr = N0;
 794       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
 795       return;
 796     }
 797   }
 798   if (Addr.getOpcode() == ISD::ADD) {
 799     // (add N0, N1) -> addr64
 800     SDValue N0 = Addr.getOperand(0);
 801     SDValue N1 = Addr.getOperand(1);
 802     Addr64 = CurDAG->getTargetConstant(1, MVT::i1);
 803     Ptr = N0;
 804     VAddr = N1;
 805     Offset = CurDAG->getTargetConstant(0, MVT::i16);
 806     return;
 807   }
 808
 809   // default case -> offset
 810   VAddr = CurDAG->getTargetConstant(0, MVT::i32);
 811   Ptr = Addr;
 812   Offset = CurDAG->getTargetConstant(0, MVT::i16);
 813
 814 }
 815
 816 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
 817                                            SDValue &VAddr,
 818                                            SDValue &Offset) const {
 819   SDValue Ptr, SOffset, Offen, Idxen, Addr64, GLC, SLC, TFE;
 820
 821   SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
 822               GLC, SLC, TFE);
 823
 824   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
 825   if (C->getSExtValue()) {
 826     SDLoc DL(Addr);
 827     SRsrc = wrapAddr64Rsrc(CurDAG, DL, Ptr);
 828     return true;
 829   }
 830   return false;
 831 }
 832
 833 static SDValue buildRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr,
 834                          uint32_t RsrcDword1, uint64_t RsrcDword2And3) {
 835
 836   SDValue PtrLo = DAG->getTargetExtractSubreg(AMDGPU::sub0, DL, MVT::i32, Ptr);
 837   SDValue PtrHi = DAG->getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr);
 838   if (RsrcDword1)
 839     PtrHi = SDValue(DAG->getMachineNode(AMDGPU::S_OR_B32, DL, MVT::i32, PtrHi,
 840                                     DAG->getConstant(RsrcDword1, MVT::i32)), 0);
 841
 842   SDValue DataLo = DAG->getTargetConstant(
 843       RsrcDword2And3 & APInt::getAllOnesValue(32).getZExtValue(), MVT::i32);
 844   SDValue DataHi = DAG->getTargetConstant(RsrcDword2And3 >> 32, MVT::i32);
 845
 846   const SDValue Ops[] = { PtrLo, PtrHi, DataLo, DataHi };
 847   return SDValue(DAG->getMachineNode(AMDGPU::SI_BUFFER_RSRC, DL,
 848                                      MVT::v4i32, Ops), 0);
 849 }
 850
 851 /// \brief Return a resource descriptor with the 'Add TID' bit enabled
 852 ///        The TID (Thread ID) is multipled by the stride value (bits [61:48]
 853 ///        of the resource descriptor) to create an offset, which is added to the
 854 ///        resource ponter.
 855 static SDValue buildScratchRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
 856
 857   uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
 858                   0xffffffff; // Size
 859
 860   return buildRSRC(DAG, DL, Ptr, 0, Rsrc);
 861 }
 862
 863 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
 864                                             SDValue &VAddr, SDValue &SOffset,
 865                                             SDValue &ImmOffset) const {
 866
 867   SDLoc DL(Addr);
 868   MachineFunction &MF = CurDAG->getMachineFunction();
 869   const SIRegisterInfo *TRI =
 870       static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
 871   MachineRegisterInfo &MRI = MF.getRegInfo();
 872
 873
 874   unsigned ScratchPtrReg =
 875       TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
 876   unsigned ScratchOffsetReg =
 877       TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
 878
 879   Rsrc = buildScratchRSRC(CurDAG, DL, CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, MRI.getLiveInVirtReg(ScratchPtrReg), MVT::i64));
 880   SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
 881       MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32);
 882
 883   // (add n0, c1)
 884   if (CurDAG->isBaseWithConstantOffset(Addr)) {
 885     SDValue N1 = Addr.getOperand(1);
 886     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
 887
 888     if (isLegalMUBUFImmOffset(C1)) {
 889       VAddr = Addr.getOperand(0);
 890       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
 891       return true;
 892     }
 893   }
 894
 895   // (add FI, n0)
 896   if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
 897        isa<FrameIndexSDNode>(Addr.getOperand(0))) {
 898     VAddr = Addr.getOperand(1);
 899     ImmOffset = Addr.getOperand(0);
 900     return true;
 901   }
 902
 903   // (FI)
 904   if (isa<FrameIndexSDNode>(Addr)) {
 905     VAddr = SDValue(CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32,
 906                                           CurDAG->getConstant(0, MVT::i32)), 0);
 907     ImmOffset = Addr;
 908     return true;
 909   }
 910
 911   // (node)
 912   VAddr = Addr;
 913   ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
 914   return true;
 915 }
 916
 917 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
 918                                            SDValue &SOffset, SDValue &Offset,
 919                                            SDValue &GLC, SDValue &SLC,
 920                                            SDValue &TFE) const {
 921   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
 922
 923   SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
 924               GLC, SLC, TFE);
 925
 926   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
 927       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
 928       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
 929     uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT |
 930                     APInt::getAllOnesValue(32).getZExtValue(); // Size
 931     SDLoc DL(Addr);
 932     SRsrc = buildRSRC(CurDAG, DL, Ptr, 0, Rsrc);
 933     return true;
 934   }
 935   return false;
 936 }
 937
 938 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
 939                                         SDValue &SrcMods) const {
 940
 941   unsigned Mods = 0;
 942
 943   Src = In;
 944
 945   if (Src.getOpcode() == ISD::FNEG) {
 946     Mods |= SISrcMods::NEG;
 947     Src = Src.getOperand(0);
 948   }
 949
 950   if (Src.getOpcode() == ISD::FABS) {
 951     Mods |= SISrcMods::ABS;
 952     Src = Src.getOperand(0);
 953   }
 954
 955   SrcMods = CurDAG->getTargetConstant(Mods, MVT::i32);
 956
 957   return true;
 958 }
 959
 960 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
 961                                          SDValue &SrcMods, SDValue &Clamp,
 962                                          SDValue &Omod) const {
 963   // FIXME: Handle Clamp and Omod
 964   Clamp = CurDAG->getTargetConstant(0, MVT::i32);
 965   Omod = CurDAG->getTargetConstant(0, MVT::i32);
 966
 967   return SelectVOP3Mods(In, Src, SrcMods);
 968 }
 969
 970 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
 971   const AMDGPUTargetLowering& Lowering =
 972     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
 973   bool IsModified = false;
 974   do {
 975     IsModified = false;
 976     // Go over all selected nodes and try to fold them a bit more
 977     for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 978          E = CurDAG->allnodes_end(); I != E; ++I) {
 979
 980       SDNode *Node = I;
 981
 982       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
 983       if (!MachineNode)
 984         continue;
 985
 986       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
 987       if (ResNode != Node) {
 988         ReplaceUses(Node, ResNode);
 989         IsModified = true;
 990       }
 991     }
 992     CurDAG->RemoveDeadNodes();
 993   } while (IsModified);
 994 }