1 //===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //==-----------------------------------------------------------------------===//
11 /// \brief Defines an instruction selector for the AMDGPU target.
13 //===----------------------------------------------------------------------===//
14 #include "AMDGPUInstrInfo.h"
15 #include "AMDGPUISelLowering.h" // For AMDGPUISD
16 #include "AMDGPURegisterInfo.h"
17 #include "AMDGPUSubtarget.h"
18 #include "R600InstrInfo.h"
19 #include "SIDefines.h"
20 #include "SIISelLowering.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/CodeGen/FunctionLoweringInfo.h"
23 #include "llvm/CodeGen/PseudoSourceValue.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/CodeGen/SelectionDAGISel.h"
28 #include "llvm/IR/Function.h"
32 //===----------------------------------------------------------------------===//
33 // Instruction Selector Implementation
34 //===----------------------------------------------------------------------===//
37 /// AMDGPU specific code to select AMDGPU machine instructions for
38 /// SelectionDAG operations.
39 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
40 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
41 // make the right decision when generating code for different targets.
42 const AMDGPUSubtarget &Subtarget;
44 AMDGPUDAGToDAGISel(TargetMachine &TM);
45 virtual ~AMDGPUDAGToDAGISel();
47 SDNode *Select(SDNode *N) override;
48 const char *getPassName() const override;
49 void PostprocessISelDAG() override;
52 bool isInlineImmediate(SDNode *N) const;
53 inline SDValue getSmallIPtrImm(unsigned Imm);
54 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
55 const R600InstrInfo *TII);
56 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
57 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
59 // Complex pattern selectors
60 bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
61 bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
62 bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
64 static bool checkType(const Value *ptr, unsigned int addrspace);
65 static bool checkPrivateAddress(const MachineMemOperand *Op);
67 static bool isGlobalStore(const StoreSDNode *N);
68 static bool isPrivateStore(const StoreSDNode *N);
69 static bool isLocalStore(const StoreSDNode *N);
70 static bool isRegionStore(const StoreSDNode *N);
72 bool isCPLoad(const LoadSDNode *N) const;
73 bool isConstantLoad(const LoadSDNode *N, int cbID) const;
74 bool isGlobalLoad(const LoadSDNode *N) const;
75 bool isParamLoad(const LoadSDNode *N) const;
76 bool isPrivateLoad(const LoadSDNode *N) const;
77 bool isLocalLoad(const LoadSDNode *N) const;
78 bool isRegionLoad(const LoadSDNode *N) const;
80 /// \returns True if the current basic block being selected is at control
81 /// flow depth 0. Meaning that the current block dominates the
83 bool isCFDepth0() const;
85 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
86 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
87 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
89 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
90 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
91 void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
92 SDValue &SOffset, SDValue &Offset, SDValue &Offen,
93 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
95 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
96 SDValue &Offset) const;
97 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
98 SDValue &SOffset, SDValue &ImmOffset) const;
99 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
100 SDValue &Offset, SDValue &GLC, SDValue &SLC,
102 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
103 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
104 SDValue &Clamp, SDValue &Omod) const;
106 SDNode *SelectADD_SUB_I64(SDNode *N);
107 SDNode *SelectDIV_SCALE(SDNode *N);
109 // Include the pieces autogenerated from the target description.
110 #include "AMDGPUGenDAGISel.inc"
112 } // end anonymous namespace
114 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
115 // DAG, ready for instruction scheduling.
116 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
117 return new AMDGPUDAGToDAGISel(TM);
120 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
121 : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) {
124 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
127 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
128 const SITargetLowering *TL
129 = static_cast<const SITargetLowering *>(getTargetLowering());
130 return TL->analyzeImmediate(N) == 0;
133 /// \brief Determine the register class for \p OpNo
134 /// \returns The register class of the virtual register that will be used for
135 /// the given operand number \OpNo or NULL if the register class cannot be
137 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
138 unsigned OpNo) const {
139 if (!N->isMachineOpcode())
142 switch (N->getMachineOpcode()) {
144 const MCInstrDesc &Desc =
145 TM.getSubtargetImpl()->getInstrInfo()->get(N->getMachineOpcode());
146 unsigned OpIdx = Desc.getNumDefs() + OpNo;
147 if (OpIdx >= Desc.getNumOperands())
149 int RegClass = Desc.OpInfo[OpIdx].RegClass;
153 return TM.getSubtargetImpl()->getRegisterInfo()->getRegClass(RegClass);
155 case AMDGPU::REG_SEQUENCE: {
156 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
157 const TargetRegisterClass *SuperRC =
158 TM.getSubtargetImpl()->getRegisterInfo()->getRegClass(RCID);
160 SDValue SubRegOp = N->getOperand(OpNo + 1);
161 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
162 return TM.getSubtargetImpl()->getRegisterInfo()->getSubClassWithSubReg(
168 SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
169 return CurDAG->getTargetConstant(Imm, MVT::i32);
172 bool AMDGPUDAGToDAGISel::SelectADDRParam(
173 SDValue Addr, SDValue& R1, SDValue& R2) {
175 if (Addr.getOpcode() == ISD::FrameIndex) {
176 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
177 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
178 R2 = CurDAG->getTargetConstant(0, MVT::i32);
181 R2 = CurDAG->getTargetConstant(0, MVT::i32);
183 } else if (Addr.getOpcode() == ISD::ADD) {
184 R1 = Addr.getOperand(0);
185 R2 = Addr.getOperand(1);
188 R2 = CurDAG->getTargetConstant(0, MVT::i32);
193 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
194 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
195 Addr.getOpcode() == ISD::TargetGlobalAddress) {
198 return SelectADDRParam(Addr, R1, R2);
202 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
203 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
204 Addr.getOpcode() == ISD::TargetGlobalAddress) {
208 if (Addr.getOpcode() == ISD::FrameIndex) {
209 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
210 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
211 R2 = CurDAG->getTargetConstant(0, MVT::i64);
214 R2 = CurDAG->getTargetConstant(0, MVT::i64);
216 } else if (Addr.getOpcode() == ISD::ADD) {
217 R1 = Addr.getOperand(0);
218 R2 = Addr.getOperand(1);
221 R2 = CurDAG->getTargetConstant(0, MVT::i64);
226 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
227 unsigned int Opc = N->getOpcode();
228 if (N->isMachineOpcode()) {
230 return nullptr; // Already selected.
233 const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
236 // We are selecting i64 ADD here instead of custom lower it during
237 // DAG legalization, so we can fold some i64 ADDs used for address
238 // calculation into the LOAD and STORE instructions.
241 if (N->getValueType(0) != MVT::i64 ||
242 ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
245 return SelectADD_SUB_I64(N);
247 case ISD::SCALAR_TO_VECTOR:
248 case AMDGPUISD::BUILD_VERTICAL_VECTOR:
249 case ISD::BUILD_VECTOR: {
251 const AMDGPURegisterInfo *TRI = static_cast<const AMDGPURegisterInfo *>(
252 TM.getSubtargetImpl()->getRegisterInfo());
253 const SIRegisterInfo *SIRI = static_cast<const SIRegisterInfo *>(
254 TM.getSubtargetImpl()->getRegisterInfo());
255 EVT VT = N->getValueType(0);
256 unsigned NumVectorElts = VT.getVectorNumElements();
257 EVT EltVT = VT.getVectorElementType();
258 assert(EltVT.bitsEq(MVT::i32));
259 if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
261 for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
263 if (!U->isMachineOpcode()) {
266 const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
270 if (SIRI->isSGPRClass(RC)) {
274 switch(NumVectorElts) {
275 case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID :
276 AMDGPU::SReg_32RegClassID;
278 case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
279 AMDGPU::SReg_64RegClassID;
281 case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
282 AMDGPU::SReg_128RegClassID;
284 case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
285 AMDGPU::SReg_256RegClassID;
287 case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
288 AMDGPU::SReg_512RegClassID;
290 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
293 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
294 // that adds a 128 bits reg copy when going through TwoAddressInstructions
295 // pass. We want to avoid 128 bits copies as much as possible because they
296 // can't be bundled by our scheduler.
297 switch(NumVectorElts) {
298 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
300 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
301 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
303 RegClassID = AMDGPU::R600_Reg128RegClassID;
305 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
309 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32);
311 if (NumVectorElts == 1) {
312 return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
313 N->getOperand(0), RegClass);
316 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
318 // 16 = Max Num Vector Elements
319 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
320 // 1 = Vector Register Class
321 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
323 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32);
324 bool IsRegSeq = true;
325 unsigned NOps = N->getNumOperands();
326 for (unsigned i = 0; i < NOps; i++) {
327 // XXX: Why is this here?
328 if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
332 RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
333 RegSeqArgs[1 + (2 * i) + 1] =
334 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
337 if (NOps != NumVectorElts) {
338 // Fill in the missing undef elements if this was a scalar_to_vector.
339 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
341 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
343 for (unsigned i = NOps; i < NumVectorElts; ++i) {
344 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
345 RegSeqArgs[1 + (2 * i) + 1] =
346 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
352 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
355 case ISD::BUILD_PAIR: {
356 SDValue RC, SubReg0, SubReg1;
357 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
360 if (N->getValueType(0) == MVT::i128) {
361 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
362 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
363 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
364 } else if (N->getValueType(0) == MVT::i64) {
365 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32);
366 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
367 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
369 llvm_unreachable("Unhandled value type for BUILD_PAIR");
371 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
372 N->getOperand(1), SubReg1 };
373 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
374 SDLoc(N), N->getValueType(0), Ops);
378 case ISD::ConstantFP: {
379 const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
380 if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
381 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
385 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
386 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
388 ConstantSDNode *C = cast<ConstantSDNode>(N);
389 Imm = C->getZExtValue();
392 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
393 CurDAG->getConstant(Imm & 0xFFFFFFFF, MVT::i32));
394 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
395 CurDAG->getConstant(Imm >> 32, MVT::i32));
396 const SDValue Ops[] = {
397 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
398 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
399 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32)
402 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N),
403 N->getValueType(0), Ops);
406 case AMDGPUISD::REGISTER_LOAD: {
407 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
409 SDValue Addr, Offset;
411 SelectADDRIndirect(N->getOperand(1), Addr, Offset);
412 const SDValue Ops[] = {
415 CurDAG->getTargetConstant(0, MVT::i32),
418 return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N),
419 CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other),
422 case AMDGPUISD::REGISTER_STORE: {
423 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
425 SDValue Addr, Offset;
426 SelectADDRIndirect(N->getOperand(2), Addr, Offset);
427 const SDValue Ops[] = {
431 CurDAG->getTargetConstant(0, MVT::i32),
434 return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N),
435 CurDAG->getVTList(MVT::Other),
439 case AMDGPUISD::BFE_I32:
440 case AMDGPUISD::BFE_U32: {
441 if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
444 // There is a scalar version available, but unlike the vector version which
445 // has a separate operand for the offset and width, the scalar version packs
446 // the width and offset into a single operand. Try to move to the scalar
447 // version if the offsets are constant, so that we can try to keep extended
448 // loads of kernel arguments in SGPRs.
450 // TODO: Technically we could try to pattern match scalar bitshifts of
451 // dynamic values, but it's probably not useful.
452 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
456 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
460 bool Signed = Opc == AMDGPUISD::BFE_I32;
462 // Transformation function, pack the offset and width of a BFE into
463 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
464 // source, bits [5:0] contain the offset and bits [22:16] the width.
466 uint32_t OffsetVal = Offset->getZExtValue();
467 uint32_t WidthVal = Width->getZExtValue();
469 uint32_t PackedVal = OffsetVal | WidthVal << 16;
471 SDValue PackedOffsetWidth = CurDAG->getTargetConstant(PackedVal, MVT::i32);
472 return CurDAG->getMachineNode(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
479 case AMDGPUISD::DIV_SCALE: {
480 return SelectDIV_SCALE(N);
483 return SelectCode(N);
487 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
488 assert(AS != 0 && "Use checkPrivateAddress instead.");
492 return Ptr->getType()->getPointerAddressSpace() == AS;
495 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
496 if (Op->getPseudoValue())
499 if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
500 return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
505 bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
506 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
509 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
510 const Value *MemVal = N->getMemOperand()->getValue();
511 return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
512 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
513 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
516 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
517 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
520 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
521 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
524 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
525 const Value *MemVal = N->getMemOperand()->getValue();
527 return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
529 return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
532 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
533 if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) {
534 const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
535 if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
536 N->getMemoryVT().bitsLT(MVT::i32)) {
540 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
543 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
544 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
547 bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const {
548 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
551 bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const {
552 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
555 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
556 MachineMemOperand *MMO = N->getMemOperand();
557 if (checkPrivateAddress(N->getMemOperand())) {
559 const PseudoSourceValue *PSV = MMO->getPseudoValue();
560 if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
568 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
569 if (checkPrivateAddress(N->getMemOperand())) {
570 // Check to make sure we are not a constant pool load or a constant load
571 // that is marked as a private load
572 if (isCPLoad(N) || isConstantLoad(N, -1)) {
577 const Value *MemVal = N->getMemOperand()->getValue();
578 if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
579 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
580 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
581 !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
582 !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
583 !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)){
589 bool AMDGPUDAGToDAGISel::isCFDepth0() const {
590 // FIXME: Figure out a way to use DominatorTree analysis here.
591 const BasicBlock *CurBlock = FuncInfo->MBB->getBasicBlock();
592 const Function *Fn = FuncInfo->Fn;
593 return &Fn->front() == CurBlock || &Fn->back() == CurBlock;
597 const char *AMDGPUDAGToDAGISel::getPassName() const {
598 return "AMDGPU DAG->DAG Pattern Instruction Selection";
606 //===----------------------------------------------------------------------===//
608 //===----------------------------------------------------------------------===//
610 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
612 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
613 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
619 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
620 SDValue& BaseReg, SDValue &Offset) {
621 if (!isa<ConstantSDNode>(Addr)) {
623 Offset = CurDAG->getIntPtrConstant(0, true);
629 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
631 ConstantSDNode *IMMOffset;
633 if (Addr.getOpcode() == ISD::ADD
634 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
635 && isInt<16>(IMMOffset->getZExtValue())) {
637 Base = Addr.getOperand(0);
638 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
640 // If the pointer address is constant, we can move it to the offset field.
641 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
642 && isInt<16>(IMMOffset->getZExtValue())) {
643 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
644 SDLoc(CurDAG->getEntryNode()),
645 AMDGPU::ZERO, MVT::i32);
646 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
650 // Default case, no offset
652 Offset = CurDAG->getTargetConstant(0, MVT::i32);
656 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
660 if ((C = dyn_cast<ConstantSDNode>(Addr))) {
661 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
662 Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
663 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
664 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
665 Base = Addr.getOperand(0);
666 Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
669 Offset = CurDAG->getTargetConstant(0, MVT::i32);
675 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
677 SDValue LHS = N->getOperand(0);
678 SDValue RHS = N->getOperand(1);
680 bool IsAdd = (N->getOpcode() == ISD::ADD);
682 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
683 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
685 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
686 DL, MVT::i32, LHS, Sub0);
687 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
688 DL, MVT::i32, LHS, Sub1);
690 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
691 DL, MVT::i32, RHS, Sub0);
692 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
693 DL, MVT::i32, RHS, Sub1);
695 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
696 SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
699 unsigned Opc = IsAdd ? AMDGPU::S_ADD_I32 : AMDGPU::S_SUB_I32;
700 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
703 Opc = IsAdd ? AMDGPU::V_ADD_I32_e32 : AMDGPU::V_SUB_I32_e32;
704 CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e32 : AMDGPU::V_SUBB_U32_e32;
707 SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
708 SDValue Carry(AddLo, 1);
710 = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
711 SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
714 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
720 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
723 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
725 EVT VT = N->getValueType(0);
727 assert(VT == MVT::f32 || VT == MVT::f64);
730 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
732 const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
744 return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
747 static SDValue wrapAddr64Rsrc(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
748 return SDValue(DAG->getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::v4i32,
752 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
753 return isUInt<12>(Imm->getZExtValue());
756 void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
757 SDValue &VAddr, SDValue &SOffset,
758 SDValue &Offset, SDValue &Offen,
759 SDValue &Idxen, SDValue &Addr64,
760 SDValue &GLC, SDValue &SLC,
761 SDValue &TFE) const {
764 GLC = CurDAG->getTargetConstant(0, MVT::i1);
765 SLC = CurDAG->getTargetConstant(0, MVT::i1);
766 TFE = CurDAG->getTargetConstant(0, MVT::i1);
768 Idxen = CurDAG->getTargetConstant(0, MVT::i1);
769 Offen = CurDAG->getTargetConstant(0, MVT::i1);
770 Addr64 = CurDAG->getTargetConstant(0, MVT::i1);
771 SOffset = CurDAG->getTargetConstant(0, MVT::i32);
773 if (CurDAG->isBaseWithConstantOffset(Addr)) {
774 SDValue N0 = Addr.getOperand(0);
775 SDValue N1 = Addr.getOperand(1);
776 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
778 if (isLegalMUBUFImmOffset(C1)) {
780 if (N0.getOpcode() == ISD::ADD) {
781 // (add (add N2, N3), C1) -> addr64
782 SDValue N2 = N0.getOperand(0);
783 SDValue N3 = N0.getOperand(1);
784 Addr64 = CurDAG->getTargetConstant(1, MVT::i1);
787 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
791 // (add N0, C1) -> offset
792 VAddr = CurDAG->getTargetConstant(0, MVT::i32);
794 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
798 if (Addr.getOpcode() == ISD::ADD) {
799 // (add N0, N1) -> addr64
800 SDValue N0 = Addr.getOperand(0);
801 SDValue N1 = Addr.getOperand(1);
802 Addr64 = CurDAG->getTargetConstant(1, MVT::i1);
805 Offset = CurDAG->getTargetConstant(0, MVT::i16);
809 // default case -> offset
810 VAddr = CurDAG->getTargetConstant(0, MVT::i32);
812 Offset = CurDAG->getTargetConstant(0, MVT::i16);
816 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
818 SDValue &Offset) const {
819 SDValue Ptr, SOffset, Offen, Idxen, Addr64, GLC, SLC, TFE;
821 SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
824 ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
825 if (C->getSExtValue()) {
827 SRsrc = wrapAddr64Rsrc(CurDAG, DL, Ptr);
833 static SDValue buildRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr,
834 uint32_t RsrcDword1, uint64_t RsrcDword2And3) {
836 SDValue PtrLo = DAG->getTargetExtractSubreg(AMDGPU::sub0, DL, MVT::i32, Ptr);
837 SDValue PtrHi = DAG->getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr);
839 PtrHi = SDValue(DAG->getMachineNode(AMDGPU::S_OR_B32, DL, MVT::i32, PtrHi,
840 DAG->getConstant(RsrcDword1, MVT::i32)), 0);
842 SDValue DataLo = DAG->getTargetConstant(
843 RsrcDword2And3 & APInt::getAllOnesValue(32).getZExtValue(), MVT::i32);
844 SDValue DataHi = DAG->getTargetConstant(RsrcDword2And3 >> 32, MVT::i32);
846 const SDValue Ops[] = { PtrLo, PtrHi, DataLo, DataHi };
847 return SDValue(DAG->getMachineNode(AMDGPU::SI_BUFFER_RSRC, DL,
848 MVT::v4i32, Ops), 0);
851 /// \brief Return a resource descriptor with the 'Add TID' bit enabled
852 /// The TID (Thread ID) is multipled by the stride value (bits [61:48]
853 /// of the resource descriptor) to create an offset, which is added to the
855 static SDValue buildScratchRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
857 uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
860 return buildRSRC(DAG, DL, Ptr, 0, Rsrc);
863 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
864 SDValue &VAddr, SDValue &SOffset,
865 SDValue &ImmOffset) const {
868 MachineFunction &MF = CurDAG->getMachineFunction();
869 const SIRegisterInfo *TRI =
870 static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
871 MachineRegisterInfo &MRI = MF.getRegInfo();
874 unsigned ScratchPtrReg =
875 TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
876 unsigned ScratchOffsetReg =
877 TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
879 Rsrc = buildScratchRSRC(CurDAG, DL, CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, MRI.getLiveInVirtReg(ScratchPtrReg), MVT::i64));
880 SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
881 MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32);
884 if (CurDAG->isBaseWithConstantOffset(Addr)) {
885 SDValue N1 = Addr.getOperand(1);
886 ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
888 if (isLegalMUBUFImmOffset(C1)) {
889 VAddr = Addr.getOperand(0);
890 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
896 if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
897 isa<FrameIndexSDNode>(Addr.getOperand(0))) {
898 VAddr = Addr.getOperand(1);
899 ImmOffset = Addr.getOperand(0);
904 if (isa<FrameIndexSDNode>(Addr)) {
905 VAddr = SDValue(CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32,
906 CurDAG->getConstant(0, MVT::i32)), 0);
913 ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
917 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
918 SDValue &SOffset, SDValue &Offset,
919 SDValue &GLC, SDValue &SLC,
920 SDValue &TFE) const {
921 SDValue Ptr, VAddr, Offen, Idxen, Addr64;
923 SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
926 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
927 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
928 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
929 uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT |
930 APInt::getAllOnesValue(32).getZExtValue(); // Size
932 SRsrc = buildRSRC(CurDAG, DL, Ptr, 0, Rsrc);
938 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
939 SDValue &SrcMods) const {
945 if (Src.getOpcode() == ISD::FNEG) {
946 Mods |= SISrcMods::NEG;
947 Src = Src.getOperand(0);
950 if (Src.getOpcode() == ISD::FABS) {
951 Mods |= SISrcMods::ABS;
952 Src = Src.getOperand(0);
955 SrcMods = CurDAG->getTargetConstant(Mods, MVT::i32);
960 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
961 SDValue &SrcMods, SDValue &Clamp,
962 SDValue &Omod) const {
963 // FIXME: Handle Clamp and Omod
964 Clamp = CurDAG->getTargetConstant(0, MVT::i32);
965 Omod = CurDAG->getTargetConstant(0, MVT::i32);
967 return SelectVOP3Mods(In, Src, SrcMods);
970 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
971 const AMDGPUTargetLowering& Lowering =
972 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
973 bool IsModified = false;
976 // Go over all selected nodes and try to fold them a bit more
977 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
978 E = CurDAG->allnodes_end(); I != E; ++I) {
982 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
986 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
987 if (ResNode != Node) {
988 ReplaceUses(Node, ResNode);
992 CurDAG->RemoveDeadNodes();
993 } while (IsModified);