1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief Custom DAG lowering for R600
13 //===----------------------------------------------------------------------===//
15 #include "R600ISelLowering.h"
16 #include "R600Defines.h"
17 #include "R600InstrInfo.h"
18 #include "R600MachineFunctionInfo.h"
19 #include "llvm/Argument.h"
20 #include "llvm/Function.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
27 R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
28 AMDGPUTargetLowering(TM),
29 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
30 setOperationAction(ISD::MUL, MVT::i64, Expand);
31 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
32 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
33 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
34 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
35 computeRegisterProperties();
37 setOperationAction(ISD::FADD, MVT::v4f32, Expand);
38 setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
39 setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
40 setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
42 setOperationAction(ISD::ADD, MVT::v4i32, Expand);
43 setOperationAction(ISD::AND, MVT::v4i32, Expand);
44 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
45 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
46 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
47 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
48 setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
49 setOperationAction(ISD::UREM, MVT::v4i32, Expand);
50 setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
52 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
53 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
55 setOperationAction(ISD::FSUB, MVT::f32, Expand);
57 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
58 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
59 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
60 setOperationAction(ISD::FPOW, MVT::f32, Custom);
62 setOperationAction(ISD::ROTL, MVT::i32, Custom);
64 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
65 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
67 setOperationAction(ISD::SETCC, MVT::i32, Custom);
68 setOperationAction(ISD::SETCC, MVT::f32, Custom);
69 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
71 setOperationAction(ISD::SELECT, MVT::i32, Custom);
72 setOperationAction(ISD::SELECT, MVT::f32, Custom);
74 setOperationAction(ISD::STORE, MVT::i32, Custom);
75 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
77 setTargetDAGCombine(ISD::FP_ROUND);
79 setSchedulingPreference(Sched::VLIW);
82 MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
83 MachineInstr * MI, MachineBasicBlock * BB) const {
84 MachineFunction * MF = BB->getParent();
85 MachineRegisterInfo &MRI = MF->getRegInfo();
86 MachineBasicBlock::iterator I = *MI;
88 switch (MI->getOpcode()) {
89 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
90 case AMDGPU::SHADER_TYPE: break;
91 case AMDGPU::CLAMP_R600: {
92 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
94 MI->getOperand(0).getReg(),
95 MI->getOperand(1).getReg());
96 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
100 case AMDGPU::FABS_R600: {
101 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
103 MI->getOperand(0).getReg(),
104 MI->getOperand(1).getReg());
105 TII->addFlag(NewMI, 0, MO_FLAG_ABS);
109 case AMDGPU::FNEG_R600: {
110 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
112 MI->getOperand(0).getReg(),
113 MI->getOperand(1).getReg());
114 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
118 case AMDGPU::R600_LOAD_CONST: {
119 int64_t RegIndex = MI->getOperand(1).getImm();
120 unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
121 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
122 .addOperand(MI->getOperand(0))
123 .addReg(ConstantReg);
127 case AMDGPU::MASK_WRITE: {
128 unsigned maskedRegister = MI->getOperand(0).getReg();
129 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
130 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
131 TII->addFlag(defInstr, 0, MO_FLAG_MASK);
135 case AMDGPU::MOV_IMM_F32:
136 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
137 MI->getOperand(1).getFPImm()->getValueAPF()
138 .bitcastToAPInt().getZExtValue());
140 case AMDGPU::MOV_IMM_I32:
141 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
142 MI->getOperand(1).getImm());
146 case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
147 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
148 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
150 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
151 .addOperand(MI->getOperand(0))
152 .addOperand(MI->getOperand(1))
153 .addImm(EOP); // Set End of program bit
157 case AMDGPU::RESERVE_REG: {
158 R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
159 int64_t ReservedIndex = MI->getOperand(0).getImm();
160 unsigned ReservedReg =
161 AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
162 MFI->ReservedRegs.push_back(ReservedReg);
164 AMDGPU::R600_Reg128RegClass.getRegister(ReservedIndex / 4);
165 MFI->ReservedRegs.push_back(SuperReg);
170 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
171 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
173 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
174 .addOperand(MI->getOperand(3))
175 .addOperand(MI->getOperand(4))
176 .addOperand(MI->getOperand(5))
177 .addOperand(MI->getOperand(6));
178 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
179 .addOperand(MI->getOperand(2))
180 .addOperand(MI->getOperand(4))
181 .addOperand(MI->getOperand(5))
182 .addOperand(MI->getOperand(6));
183 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
184 .addOperand(MI->getOperand(0))
185 .addOperand(MI->getOperand(1))
186 .addOperand(MI->getOperand(4))
187 .addOperand(MI->getOperand(5))
188 .addOperand(MI->getOperand(6))
189 .addReg(T0, RegState::Implicit)
190 .addReg(T1, RegState::Implicit);
194 case AMDGPU::TXD_SHADOW: {
195 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
196 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
198 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
199 .addOperand(MI->getOperand(3))
200 .addOperand(MI->getOperand(4))
201 .addOperand(MI->getOperand(5))
202 .addOperand(MI->getOperand(6));
203 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
204 .addOperand(MI->getOperand(2))
205 .addOperand(MI->getOperand(4))
206 .addOperand(MI->getOperand(5))
207 .addOperand(MI->getOperand(6));
208 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
209 .addOperand(MI->getOperand(0))
210 .addOperand(MI->getOperand(1))
211 .addOperand(MI->getOperand(4))
212 .addOperand(MI->getOperand(5))
213 .addOperand(MI->getOperand(6))
214 .addReg(T0, RegState::Implicit)
215 .addReg(T1, RegState::Implicit);
220 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
221 .addOperand(MI->getOperand(0))
225 case AMDGPU::BRANCH_COND_f32: {
226 MachineInstr *NewMI =
227 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
228 AMDGPU::PREDICATE_BIT)
229 .addOperand(MI->getOperand(1))
230 .addImm(OPCODE_IS_NOT_ZERO)
232 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
233 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
234 .addOperand(MI->getOperand(0))
235 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
239 case AMDGPU::BRANCH_COND_i32: {
240 MachineInstr *NewMI =
241 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
242 AMDGPU::PREDICATE_BIT)
243 .addOperand(MI->getOperand(1))
244 .addImm(OPCODE_IS_NOT_ZERO_INT)
246 TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
247 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
248 .addOperand(MI->getOperand(0))
249 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
253 case AMDGPU::input_perspective: {
254 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
256 // XXX Be more fine about register reservation
257 for (unsigned i = 0; i < 4; i ++) {
258 unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i);
259 MFI->ReservedRegs.push_back(ReservedReg);
262 switch (MI->getOperand(1).getImm()) {
263 case 0:// Perspective
264 MFI->HasPerspectiveInterpolation = true;
267 MFI->HasLinearInterpolation = true;
270 assert(0 && "Unknow ij index");
276 case AMDGPU::EG_ExportSwz:
277 case AMDGPU::R600_ExportSwz: {
278 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
281 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
282 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
283 .addOperand(MI->getOperand(0))
284 .addOperand(MI->getOperand(1))
285 .addOperand(MI->getOperand(2))
286 .addOperand(MI->getOperand(3))
287 .addOperand(MI->getOperand(4))
288 .addOperand(MI->getOperand(5))
289 .addOperand(MI->getOperand(6))
296 MI->eraseFromParent();
300 //===----------------------------------------------------------------------===//
301 // Custom DAG Lowering Operations
302 //===----------------------------------------------------------------------===//
304 using namespace llvm::Intrinsic;
305 using namespace llvm::AMDGPUIntrinsic;
308 InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap,
309 unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type,
310 SDValue Scalar, SDValue Chain) {
311 if (!ExportMap[Slot]) {
312 SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
314 DAG.getUNDEF(MVT::v4f32),
316 DAG.getConstant(Channel, MVT::i32));
318 unsigned Mask = 1 << Channel;
320 const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32),
321 DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32),
322 DAG.getConstant(Mask, MVT::i32)};
324 SDValue Res = DAG.getNode(
329 ExportMap[Slot] = Res.getNode();
333 SDNode *ExportInstruction = (SDNode *) ExportMap[Slot] ;
334 SDValue PreviousVector = ExportInstruction->getOperand(1);
335 SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
339 DAG.getConstant(Channel, MVT::i32));
341 unsigned Mask = dyn_cast<ConstantSDNode>(ExportInstruction->getOperand(5))
343 Mask |= (1 << Channel);
345 const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector,
346 DAG.getConstant(Inst, MVT::i32),
347 DAG.getConstant(Type, MVT::i32),
348 DAG.getConstant(Slot, MVT::i32),
349 DAG.getConstant(Mask, MVT::i32)};
351 DAG.UpdateNodeOperands(ExportInstruction,
358 SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
359 switch (Op.getOpcode()) {
360 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
361 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
362 case ISD::ROTL: return LowerROTL(Op, DAG);
363 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
364 case ISD::SELECT: return LowerSELECT(Op, DAG);
365 case ISD::SETCC: return LowerSETCC(Op, DAG);
366 case ISD::STORE: return LowerSTORE(Op, DAG);
367 case ISD::FPOW: return LowerFPOW(Op, DAG);
368 case ISD::INTRINSIC_VOID: {
369 SDValue Chain = Op.getOperand(0);
370 unsigned IntrinsicID =
371 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
372 switch (IntrinsicID) {
373 case AMDGPUIntrinsic::AMDGPU_store_output: {
374 MachineFunction &MF = DAG.getMachineFunction();
375 MachineRegisterInfo &MRI = MF.getRegInfo();
376 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
377 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
378 if (!MRI.isLiveOut(Reg)) {
381 return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
383 case AMDGPUIntrinsic::R600_store_pixel_color: {
384 MachineFunction &MF = DAG.getMachineFunction();
385 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
386 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
388 SDNode **OutputsMap = MFI->Outputs;
389 return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
390 RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2),
394 case AMDGPUIntrinsic::R600_store_stream_output : {
395 MachineFunction &MF = DAG.getMachineFunction();
396 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
397 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
398 int64_t BufIndex = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
400 SDNode **OutputsMap = MFI->StreamOutputs[BufIndex];
402 switch (cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue() ) {
420 llvm_unreachable("Wrong buffer id for stream outputs !");
423 return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
424 RegIndex / 4, RegIndex % 4, Inst, 0, Op.getOperand(2),
427 // default for switch(IntrinsicID)
430 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
433 case ISD::INTRINSIC_WO_CHAIN: {
434 unsigned IntrinsicID =
435 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
436 EVT VT = Op.getValueType();
437 DebugLoc DL = Op.getDebugLoc();
438 switch(IntrinsicID) {
439 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
440 case AMDGPUIntrinsic::R600_load_input: {
441 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
442 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
443 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
445 case AMDGPUIntrinsic::R600_load_input_perspective: {
446 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
448 return DAG.getUNDEF(MVT::f32);
449 SDValue FullVector = DAG.getNode(
452 DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
453 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
454 DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
456 case AMDGPUIntrinsic::R600_load_input_linear: {
457 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
459 return DAG.getUNDEF(MVT::f32);
460 SDValue FullVector = DAG.getNode(
463 DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
464 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
465 DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
467 case AMDGPUIntrinsic::R600_load_input_constant: {
468 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
470 return DAG.getUNDEF(MVT::f32);
471 SDValue FullVector = DAG.getNode(
472 AMDGPUISD::INTERP_P0,
474 DAG.getConstant(slot / 4 , MVT::i32));
475 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
476 DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
479 case r600_read_ngroups_x:
480 return LowerImplicitParameter(DAG, VT, DL, 0);
481 case r600_read_ngroups_y:
482 return LowerImplicitParameter(DAG, VT, DL, 1);
483 case r600_read_ngroups_z:
484 return LowerImplicitParameter(DAG, VT, DL, 2);
485 case r600_read_global_size_x:
486 return LowerImplicitParameter(DAG, VT, DL, 3);
487 case r600_read_global_size_y:
488 return LowerImplicitParameter(DAG, VT, DL, 4);
489 case r600_read_global_size_z:
490 return LowerImplicitParameter(DAG, VT, DL, 5);
491 case r600_read_local_size_x:
492 return LowerImplicitParameter(DAG, VT, DL, 6);
493 case r600_read_local_size_y:
494 return LowerImplicitParameter(DAG, VT, DL, 7);
495 case r600_read_local_size_z:
496 return LowerImplicitParameter(DAG, VT, DL, 8);
498 case r600_read_tgid_x:
499 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
501 case r600_read_tgid_y:
502 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
504 case r600_read_tgid_z:
505 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
507 case r600_read_tidig_x:
508 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
510 case r600_read_tidig_y:
511 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
513 case r600_read_tidig_z:
514 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
517 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
520 } // end switch(Op.getOpcode())
524 void R600TargetLowering::ReplaceNodeResults(SDNode *N,
525 SmallVectorImpl<SDValue> &Results,
526 SelectionDAG &DAG) const {
527 switch (N->getOpcode()) {
529 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
533 SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
538 Op, DAG.getConstantFP(0.0f, MVT::f32),
539 DAG.getCondCode(ISD::SETNE)
543 SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
544 SDValue Chain = Op.getOperand(0);
545 SDValue CC = Op.getOperand(1);
546 SDValue LHS = Op.getOperand(2);
547 SDValue RHS = Op.getOperand(3);
548 SDValue JumpT = Op.getOperand(4);
552 if (LHS.getValueType() == MVT::i32) {
553 CmpValue = DAG.getNode(
558 DAG.getConstant(-1, MVT::i32),
559 DAG.getConstant(0, MVT::i32),
561 } else if (LHS.getValueType() == MVT::f32) {
562 CmpValue = DAG.getNode(
567 DAG.getConstantFP(1.0f, MVT::f32),
568 DAG.getConstantFP(0.0f, MVT::f32),
571 assert(0 && "Not valid type for br_cc");
573 Result = DAG.getNode(
574 AMDGPUISD::BRANCH_COND,
575 CmpValue.getDebugLoc(),
581 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
583 unsigned DwordOffset) const {
584 unsigned ByteOffset = DwordOffset * 4;
585 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
586 AMDGPUAS::PARAM_I_ADDRESS);
588 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
589 assert(isInt<16>(ByteOffset));
591 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
592 DAG.getConstant(ByteOffset, MVT::i32), // PTR
593 MachinePointerInfo(ConstantPointerNull::get(PtrType)),
594 false, false, false, 0);
597 SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
598 DebugLoc DL = Op.getDebugLoc();
599 EVT VT = Op.getValueType();
601 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
604 DAG.getNode(ISD::SUB, DL, VT,
605 DAG.getConstant(32, MVT::i32),
609 bool R600TargetLowering::isZero(SDValue Op) const {
610 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
611 return Cst->isNullValue();
612 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
613 return CstFP->isZero();
619 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
620 DebugLoc DL = Op.getDebugLoc();
621 EVT VT = Op.getValueType();
623 SDValue LHS = Op.getOperand(0);
624 SDValue RHS = Op.getOperand(1);
625 SDValue True = Op.getOperand(2);
626 SDValue False = Op.getOperand(3);
627 SDValue CC = Op.getOperand(4);
630 // LHS and RHS are guaranteed to be the same value type
631 EVT CompareVT = LHS.getValueType();
633 // Check if we can lower this to a native operation.
635 // Try to lower to a CND* instruction:
636 // CND* instructions requires RHS to be zero. Some SELECT_CC nodes that
637 // can be lowered to CND* instructions can also be lowered to SET*
638 // instructions. CND* instructions are cheaper, because they dont't
639 // require additional instructions to convert their result to the correct
640 // value type, so this check should be first.
641 if (isZero(LHS) || isZero(RHS)) {
642 SDValue Cond = (isZero(LHS) ? RHS : LHS);
643 SDValue Zero = (isZero(LHS) ? LHS : RHS);
644 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
645 if (CompareVT != VT) {
646 // Bitcast True / False to the correct types. This will end up being
647 // a nop, but it allows us to define only a single pattern in the
648 // .TD files for each CND* instruction rather than having to have
649 // one pattern for integer True/False and one for fp True/False
650 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
651 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
654 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
667 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
675 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
678 DAG.getCondCode(CCOpcode));
679 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
682 // Try to lower to a SET* instruction:
683 // We need all the operands of SELECT_CC to have the same value type, so if
684 // necessary we need to change True and False to be the same type as LHS and
685 // RHS, and then convert the result of the select_cc back to the correct type.
687 // Move hardware True/False values to the correct operand.
688 if (isHWTrueValue(False) && isHWFalseValue(True)) {
689 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
690 std::swap(False, True);
691 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
694 if (isHWTrueValue(True) && isHWFalseValue(False)) {
695 if (CompareVT != VT) {
696 if (VT == MVT::f32 && CompareVT == MVT::i32) {
697 SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
699 DAG.getConstant(-1, MVT::i32),
700 DAG.getConstant(0, MVT::i32),
702 // Convert integer values of true (-1) and false (0) to fp values of
703 // true (1.0f) and false (0.0f).
704 SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
705 DAG.getConstant(1, MVT::i32));
706 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
707 } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
708 SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
710 DAG.getConstantFP(1.0f, MVT::f32),
711 DAG.getConstantFP(0.0f, MVT::f32),
713 // Convert fp values of true (1.0f) and false (0.0f) to integer values
714 // of true (-1) and false (0).
715 SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt);
716 return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg);
718 // I don't think there will be any other type pairings.
719 assert(!"Unhandled operand type parings in SELECT_CC");
722 // This SELECT_CC is already legal.
723 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
727 // Possible Min/Max pattern
728 SDValue MinMax = LowerMinMax(Op, DAG);
729 if (MinMax.getNode()) {
733 // If we make it this for it means we have no native instructions to handle
734 // this SELECT_CC, so we must lower it.
735 SDValue HWTrue, HWFalse;
737 if (CompareVT == MVT::f32) {
738 HWTrue = DAG.getConstantFP(1.0f, CompareVT);
739 HWFalse = DAG.getConstantFP(0.0f, CompareVT);
740 } else if (CompareVT == MVT::i32) {
741 HWTrue = DAG.getConstant(-1, CompareVT);
742 HWFalse = DAG.getConstant(0, CompareVT);
745 assert(!"Unhandled value type in LowerSELECT_CC");
748 // Lower this unsupported SELECT_CC into a combination of two supported
749 // SELECT_CC operations.
750 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
752 return DAG.getNode(ISD::SELECT_CC, DL, VT,
755 DAG.getCondCode(ISD::SETNE));
758 SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
759 return DAG.getNode(ISD::SELECT_CC,
763 DAG.getConstant(0, MVT::i32),
766 DAG.getCondCode(ISD::SETNE));
769 SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
771 SDValue LHS = Op.getOperand(0);
772 SDValue RHS = Op.getOperand(1);
773 SDValue CC = Op.getOperand(2);
774 DebugLoc DL = Op.getDebugLoc();
775 assert(Op.getValueType() == MVT::i32);
776 if (LHS.getValueType() == MVT::i32) {
782 DAG.getConstant(-1, MVT::i32),
783 DAG.getConstant(0, MVT::i32),
785 } else if (LHS.getValueType() == MVT::f32) {
791 DAG.getConstantFP(1.0f, MVT::f32),
792 DAG.getConstantFP(0.0f, MVT::f32),
800 assert(0 && "Not valid type for set_cc");
806 DAG.getConstant(1, MVT::i32),
811 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
812 DebugLoc DL = Op.getDebugLoc();
813 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
814 SDValue Chain = Op.getOperand(0);
815 SDValue Value = Op.getOperand(1);
816 SDValue Ptr = Op.getOperand(2);
818 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
819 Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
820 // Convert pointer from byte address to dword address.
821 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
822 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
823 Ptr, DAG.getConstant(2, MVT::i32)));
825 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
826 assert(!"Truncated and indexed stores not supported yet");
828 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
836 SDValue R600TargetLowering::LowerFPOW(SDValue Op,
837 SelectionDAG &DAG) const {
838 DebugLoc DL = Op.getDebugLoc();
839 EVT VT = Op.getValueType();
840 SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0));
841 SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase);
842 return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase);
845 /// XXX Only kernel functions are supported, so we can assume for now that
846 /// every function is a kernel function, but in the future we should use
847 /// separate calling conventions for kernel and non-kernel functions.
848 SDValue R600TargetLowering::LowerFormalArguments(
850 CallingConv::ID CallConv,
852 const SmallVectorImpl<ISD::InputArg> &Ins,
853 DebugLoc DL, SelectionDAG &DAG,
854 SmallVectorImpl<SDValue> &InVals) const {
855 unsigned ParamOffsetBytes = 36;
856 Function::const_arg_iterator FuncArg =
857 DAG.getMachineFunction().getFunction()->arg_begin();
858 for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
860 Type *ArgType = FuncArg->getType();
861 unsigned ArgSizeInBits = ArgType->isPointerTy() ?
862 32 : ArgType->getPrimitiveSizeInBits();
863 unsigned ArgBytes = ArgSizeInBits >> 3;
865 if (ArgSizeInBits < VT.getSizeInBits()) {
866 assert(!ArgType->isFloatTy() &&
867 "Extending floating point arguments not supported yet");
868 ArgVT = MVT::getIntegerVT(ArgSizeInBits);
872 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
873 AMDGPUAS::PARAM_I_ADDRESS);
874 SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
875 DAG.getConstant(ParamOffsetBytes, MVT::i32),
876 MachinePointerInfo(new Argument(PtrTy)),
877 ArgVT, false, false, ArgBytes);
878 InVals.push_back(Arg);
879 ParamOffsetBytes += ArgBytes;
884 EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
885 if (!VT.isVector()) return MVT::i32;
886 return VT.changeVectorElementTypeToInteger();
889 //===----------------------------------------------------------------------===//
890 // Custom DAG Optimizations
891 //===----------------------------------------------------------------------===//
893 SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
894 DAGCombinerInfo &DCI) const {
895 SelectionDAG &DAG = DCI.DAG;
897 switch (N->getOpcode()) {
898 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
899 case ISD::FP_ROUND: {
900 SDValue Arg = N->getOperand(0);
901 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
902 return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),