lib/Target/R600/R600ISelLowering.cpp

   1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// \brief Custom DAG lowering for R600
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "R600ISelLowering.h"
  16 #include "R600Defines.h"
  17 #include "R600InstrInfo.h"
  18 #include "R600MachineFunctionInfo.h"
  19 #include "llvm/CodeGen/MachineInstrBuilder.h"
  20 #include "llvm/CodeGen/MachineRegisterInfo.h"
  21 #include "llvm/CodeGen/SelectionDAG.h"
  22 #include "llvm/IR/Argument.h"
  23 #include "llvm/IR/Function.h"
  24
  25 using namespace llvm;
  26
  27 R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
  28     AMDGPUTargetLowering(TM),
  29     TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
  30   setOperationAction(ISD::MUL, MVT::i64, Expand);
  31   addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
  32   addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
  33   addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
  34   addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
  35   computeRegisterProperties();
  36
  37   setOperationAction(ISD::FADD, MVT::v4f32, Expand);
  38   setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
  39   setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
  40   setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
  41
  42   setOperationAction(ISD::ADD,  MVT::v4i32, Expand);
  43   setOperationAction(ISD::AND,  MVT::v4i32, Expand);
  44   setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
  45   setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
  46   setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
  47   setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
  48   setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
  49   setOperationAction(ISD::UREM, MVT::v4i32, Expand);
  50   setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
  51
  52   setOperationAction(ISD::BR_CC, MVT::i32, Custom);
  53   setOperationAction(ISD::BR_CC, MVT::f32, Custom);
  54
  55   setOperationAction(ISD::FSUB, MVT::f32, Expand);
  56
  57   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
  58   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
  59   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
  60   setOperationAction(ISD::FPOW, MVT::f32, Custom);
  61
  62   setOperationAction(ISD::ROTL, MVT::i32, Custom);
  63
  64   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
  65   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
  66
  67   setOperationAction(ISD::SETCC, MVT::i32, Custom);
  68   setOperationAction(ISD::SETCC, MVT::f32, Custom);
  69   setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
  70
  71   setOperationAction(ISD::SELECT, MVT::i32, Custom);
  72   setOperationAction(ISD::SELECT, MVT::f32, Custom);
  73
  74   setOperationAction(ISD::STORE, MVT::i32, Custom);
  75   setOperationAction(ISD::STORE, MVT::v4i32, Custom);
  76
  77   setOperationAction(ISD::LOAD, MVT::i32, Custom);
  78   setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
  79   setTargetDAGCombine(ISD::FP_ROUND);
  80   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
  81
  82   setSchedulingPreference(Sched::VLIW);
  83 }
  84
  85 MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
  86     MachineInstr * MI, MachineBasicBlock * BB) const {
  87   MachineFunction * MF = BB->getParent();
  88   MachineRegisterInfo &MRI = MF->getRegInfo();
  89   MachineBasicBlock::iterator I = *MI;
  90
  91   switch (MI->getOpcode()) {
  92   default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
  93   case AMDGPU::SHADER_TYPE: break;
  94   case AMDGPU::CLAMP_R600: {
  95     MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
  96                                                    AMDGPU::MOV,
  97                                                    MI->getOperand(0).getReg(),
  98                                                    MI->getOperand(1).getReg());
  99     TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
 100     break;
 101   }
 102
 103   case AMDGPU::FABS_R600: {
 104     MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
 105                                                     AMDGPU::MOV,
 106                                                     MI->getOperand(0).getReg(),
 107                                                     MI->getOperand(1).getReg());
 108     TII->addFlag(NewMI, 0, MO_FLAG_ABS);
 109     break;
 110   }
 111
 112   case AMDGPU::FNEG_R600: {
 113     MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
 114                                                     AMDGPU::MOV,
 115                                                     MI->getOperand(0).getReg(),
 116                                                     MI->getOperand(1).getReg());
 117     TII->addFlag(NewMI, 0, MO_FLAG_NEG);
 118     break;
 119   }
 120
 121   case AMDGPU::MASK_WRITE: {
 122     unsigned maskedRegister = MI->getOperand(0).getReg();
 123     assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
 124     MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
 125     TII->addFlag(defInstr, 0, MO_FLAG_MASK);
 126     break;
 127   }
 128
 129   case AMDGPU::MOV_IMM_F32:
 130     TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
 131                      MI->getOperand(1).getFPImm()->getValueAPF()
 132                          .bitcastToAPInt().getZExtValue());
 133     break;
 134   case AMDGPU::MOV_IMM_I32:
 135     TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
 136                      MI->getOperand(1).getImm());
 137     break;
 138
 139
 140   case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
 141   case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
 142     unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
 143
 144     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
 145             .addOperand(MI->getOperand(0))
 146             .addOperand(MI->getOperand(1))
 147             .addImm(EOP); // Set End of program bit
 148     break;
 149   }
 150
 151   case AMDGPU::TXD: {
 152     unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
 153     unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
 154
 155     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
 156             .addOperand(MI->getOperand(3))
 157             .addOperand(MI->getOperand(4))
 158             .addOperand(MI->getOperand(5))
 159             .addOperand(MI->getOperand(6));
 160     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
 161             .addOperand(MI->getOperand(2))
 162             .addOperand(MI->getOperand(4))
 163             .addOperand(MI->getOperand(5))
 164             .addOperand(MI->getOperand(6));
 165     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
 166             .addOperand(MI->getOperand(0))
 167             .addOperand(MI->getOperand(1))
 168             .addOperand(MI->getOperand(4))
 169             .addOperand(MI->getOperand(5))
 170             .addOperand(MI->getOperand(6))
 171             .addReg(T0, RegState::Implicit)
 172             .addReg(T1, RegState::Implicit);
 173     break;
 174   }
 175
 176   case AMDGPU::TXD_SHADOW: {
 177     unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
 178     unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
 179
 180     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
 181             .addOperand(MI->getOperand(3))
 182             .addOperand(MI->getOperand(4))
 183             .addOperand(MI->getOperand(5))
 184             .addOperand(MI->getOperand(6));
 185     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
 186             .addOperand(MI->getOperand(2))
 187             .addOperand(MI->getOperand(4))
 188             .addOperand(MI->getOperand(5))
 189             .addOperand(MI->getOperand(6));
 190     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
 191             .addOperand(MI->getOperand(0))
 192             .addOperand(MI->getOperand(1))
 193             .addOperand(MI->getOperand(4))
 194             .addOperand(MI->getOperand(5))
 195             .addOperand(MI->getOperand(6))
 196             .addReg(T0, RegState::Implicit)
 197             .addReg(T1, RegState::Implicit);
 198     break;
 199   }
 200
 201   case AMDGPU::BRANCH:
 202       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
 203               .addOperand(MI->getOperand(0))
 204               .addReg(0);
 205       break;
 206
 207   case AMDGPU::BRANCH_COND_f32: {
 208     MachineInstr *NewMI =
 209       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
 210               AMDGPU::PREDICATE_BIT)
 211               .addOperand(MI->getOperand(1))
 212               .addImm(OPCODE_IS_NOT_ZERO)
 213               .addImm(0); // Flags
 214     TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
 215     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
 216             .addOperand(MI->getOperand(0))
 217             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
 218     break;
 219   }
 220
 221   case AMDGPU::BRANCH_COND_i32: {
 222     MachineInstr *NewMI =
 223       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
 224             AMDGPU::PREDICATE_BIT)
 225             .addOperand(MI->getOperand(1))
 226             .addImm(OPCODE_IS_NOT_ZERO_INT)
 227             .addImm(0); // Flags
 228     TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
 229     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
 230            .addOperand(MI->getOperand(0))
 231             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
 232     break;
 233   }
 234
 235   case AMDGPU::EG_ExportSwz:
 236   case AMDGPU::R600_ExportSwz: {
 237     // Instruction is left unmodified if its not the last one of its type
 238     bool isLastInstructionOfItsType = true;
 239     unsigned InstExportType = MI->getOperand(1).getImm();
 240     for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
 241          EndBlock = BB->end(); NextExportInst != EndBlock;
 242          NextExportInst = llvm::next(NextExportInst)) {
 243       if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
 244           NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
 245         unsigned CurrentInstExportType = NextExportInst->getOperand(1)
 246             .getImm();
 247         if (CurrentInstExportType == InstExportType) {
 248           isLastInstructionOfItsType = false;
 249           break;
 250         }
 251       }
 252     }
 253     bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
 254     if (!EOP && !isLastInstructionOfItsType)
 255       return BB;
 256     unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
 257     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
 258             .addOperand(MI->getOperand(0))
 259             .addOperand(MI->getOperand(1))
 260             .addOperand(MI->getOperand(2))
 261             .addOperand(MI->getOperand(3))
 262             .addOperand(MI->getOperand(4))
 263             .addOperand(MI->getOperand(5))
 264             .addOperand(MI->getOperand(6))
 265             .addImm(CfInst)
 266             .addImm(EOP);
 267     break;
 268   }
 269   }
 270
 271   MI->eraseFromParent();
 272   return BB;
 273 }
 274
 275 //===----------------------------------------------------------------------===//
 276 // Custom DAG Lowering Operations
 277 //===----------------------------------------------------------------------===//
 278
 279 using namespace llvm::Intrinsic;
 280 using namespace llvm::AMDGPUIntrinsic;
 281
 282 static SDValue
 283 InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap,
 284     unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type,
 285     SDValue Scalar, SDValue Chain) {
 286   if (!ExportMap[Slot]) {
 287     SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
 288       DL, MVT::v4f32,
 289       DAG.getUNDEF(MVT::v4f32),
 290       Scalar,
 291       DAG.getConstant(Channel, MVT::i32));
 292
 293     unsigned Mask = 1 << Channel;
 294
 295     const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32),
 296         DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32),
 297         DAG.getConstant(Mask, MVT::i32)};
 298
 299     SDValue Res =  DAG.getNode(
 300         AMDGPUISD::EXPORT,
 301         DL,
 302         MVT::Other,
 303         Ops, 6);
 304      ExportMap[Slot] = Res.getNode();
 305      return Res;
 306   }
 307
 308   SDNode *ExportInstruction = (SDNode *) ExportMap[Slot] ;
 309   SDValue PreviousVector = ExportInstruction->getOperand(1);
 310   SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
 311       DL, MVT::v4f32,
 312       PreviousVector,
 313       Scalar,
 314       DAG.getConstant(Channel, MVT::i32));
 315
 316   unsigned Mask = dyn_cast<ConstantSDNode>(ExportInstruction->getOperand(5))
 317       ->getZExtValue();
 318   Mask |= (1 << Channel);
 319
 320   const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector,
 321       DAG.getConstant(Inst, MVT::i32),
 322       DAG.getConstant(Type, MVT::i32),
 323       DAG.getConstant(Slot, MVT::i32),
 324       DAG.getConstant(Mask, MVT::i32)};
 325
 326   DAG.UpdateNodeOperands(ExportInstruction,
 327       Ops, 6);
 328
 329   return Chain;
 330
 331 }
 332
 333 SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
 334   switch (Op.getOpcode()) {
 335   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
 336   case ISD::BR_CC: return LowerBR_CC(Op, DAG);
 337   case ISD::ROTL: return LowerROTL(Op, DAG);
 338   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
 339   case ISD::SELECT: return LowerSELECT(Op, DAG);
 340   case ISD::SETCC: return LowerSETCC(Op, DAG);
 341   case ISD::STORE: return LowerSTORE(Op, DAG);
 342   case ISD::LOAD: return LowerLOAD(Op, DAG);
 343   case ISD::FPOW: return LowerFPOW(Op, DAG);
 344   case ISD::INTRINSIC_VOID: {
 345     SDValue Chain = Op.getOperand(0);
 346     unsigned IntrinsicID =
 347                          cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 348     switch (IntrinsicID) {
 349     case AMDGPUIntrinsic::AMDGPU_store_output: {
 350       MachineFunction &MF = DAG.getMachineFunction();
 351       MachineRegisterInfo &MRI = MF.getRegInfo();
 352       int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
 353       unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
 354       if (!MRI.isLiveOut(Reg)) {
 355         MRI.addLiveOut(Reg);
 356       }
 357       return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
 358     }
 359     case AMDGPUIntrinsic::R600_store_pixel_color: {
 360       MachineFunction &MF = DAG.getMachineFunction();
 361       R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
 362       int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
 363
 364       SDNode **OutputsMap = MFI->Outputs;
 365       return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
 366           RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2),
 367           Chain);
 368
 369     }
 370
 371     // default for switch(IntrinsicID)
 372     default: break;
 373     }
 374     // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
 375     break;
 376   }
 377   case ISD::INTRINSIC_WO_CHAIN: {
 378     unsigned IntrinsicID =
 379                          cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 380     EVT VT = Op.getValueType();
 381     DebugLoc DL = Op.getDebugLoc();
 382     switch(IntrinsicID) {
 383     default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
 384     case AMDGPUIntrinsic::R600_load_input: {
 385       int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 386       unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
 387       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
 388     }
 389
 390     case AMDGPUIntrinsic::R600_interp_input: {
 391       int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 392       int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
 393       MachineSDNode *interp;
 394       if (ijb < 0) {
 395         interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
 396             MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
 397         return DAG.getTargetExtractSubreg(
 398             TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
 399             DL, MVT::f32, SDValue(interp, 0));
 400       }
 401
 402       if (slot % 4 < 2)
 403         interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
 404             MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
 405             CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 406                 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
 407             CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 408                 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
 409       else
 410         interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
 411             MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
 412             CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 413                 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
 414             CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 415                 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
 416
 417       return SDValue(interp, slot % 2);
 418     }
 419
 420     case r600_read_ngroups_x:
 421       return LowerImplicitParameter(DAG, VT, DL, 0);
 422     case r600_read_ngroups_y:
 423       return LowerImplicitParameter(DAG, VT, DL, 1);
 424     case r600_read_ngroups_z:
 425       return LowerImplicitParameter(DAG, VT, DL, 2);
 426     case r600_read_global_size_x:
 427       return LowerImplicitParameter(DAG, VT, DL, 3);
 428     case r600_read_global_size_y:
 429       return LowerImplicitParameter(DAG, VT, DL, 4);
 430     case r600_read_global_size_z:
 431       return LowerImplicitParameter(DAG, VT, DL, 5);
 432     case r600_read_local_size_x:
 433       return LowerImplicitParameter(DAG, VT, DL, 6);
 434     case r600_read_local_size_y:
 435       return LowerImplicitParameter(DAG, VT, DL, 7);
 436     case r600_read_local_size_z:
 437       return LowerImplicitParameter(DAG, VT, DL, 8);
 438
 439     case r600_read_tgid_x:
 440       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 441                                   AMDGPU::T1_X, VT);
 442     case r600_read_tgid_y:
 443       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 444                                   AMDGPU::T1_Y, VT);
 445     case r600_read_tgid_z:
 446       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 447                                   AMDGPU::T1_Z, VT);
 448     case r600_read_tidig_x:
 449       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 450                                   AMDGPU::T0_X, VT);
 451     case r600_read_tidig_y:
 452       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 453                                   AMDGPU::T0_Y, VT);
 454     case r600_read_tidig_z:
 455       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 456                                   AMDGPU::T0_Z, VT);
 457     }
 458     // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
 459     break;
 460   }
 461   } // end switch(Op.getOpcode())
 462   return SDValue();
 463 }
 464
 465 void R600TargetLowering::ReplaceNodeResults(SDNode *N,
 466                                             SmallVectorImpl<SDValue> &Results,
 467                                             SelectionDAG &DAG) const {
 468   switch (N->getOpcode()) {
 469   default: return;
 470   case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
 471     return;
 472   case ISD::LOAD: {
 473     SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
 474     Results.push_back(SDValue(Node, 0));
 475     Results.push_back(SDValue(Node, 1));
 476     // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
 477     // function
 478     DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
 479     return;
 480   }
 481   }
 482 }
 483
 484 SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
 485   return DAG.getNode(
 486       ISD::SETCC,
 487       Op.getDebugLoc(),
 488       MVT::i1,
 489       Op, DAG.getConstantFP(0.0f, MVT::f32),
 490       DAG.getCondCode(ISD::SETNE)
 491       );
 492 }
 493
 494 SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
 495   SDValue Chain = Op.getOperand(0);
 496   SDValue CC = Op.getOperand(1);
 497   SDValue LHS   = Op.getOperand(2);
 498   SDValue RHS   = Op.getOperand(3);
 499   SDValue JumpT  = Op.getOperand(4);
 500   SDValue CmpValue;
 501   SDValue Result;
 502
 503   if (LHS.getValueType() == MVT::i32) {
 504     CmpValue = DAG.getNode(
 505         ISD::SELECT_CC,
 506         Op.getDebugLoc(),
 507         MVT::i32,
 508         LHS, RHS,
 509         DAG.getConstant(-1, MVT::i32),
 510         DAG.getConstant(0, MVT::i32),
 511         CC);
 512   } else if (LHS.getValueType() == MVT::f32) {
 513     CmpValue = DAG.getNode(
 514         ISD::SELECT_CC,
 515         Op.getDebugLoc(),
 516         MVT::f32,
 517         LHS, RHS,
 518         DAG.getConstantFP(1.0f, MVT::f32),
 519         DAG.getConstantFP(0.0f, MVT::f32),
 520         CC);
 521   } else {
 522     assert(0 && "Not valid type for br_cc");
 523   }
 524   Result = DAG.getNode(
 525       AMDGPUISD::BRANCH_COND,
 526       CmpValue.getDebugLoc(),
 527       MVT::Other, Chain,
 528       JumpT, CmpValue);
 529   return Result;
 530 }
 531
 532 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
 533                                                    DebugLoc DL,
 534                                                    unsigned DwordOffset) const {
 535   unsigned ByteOffset = DwordOffset * 4;
 536   PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
 537                                       AMDGPUAS::PARAM_I_ADDRESS);
 538
 539   // We shouldn't be using an offset wider than 16-bits for implicit parameters.
 540   assert(isInt<16>(ByteOffset));
 541
 542   return DAG.getLoad(VT, DL, DAG.getEntryNode(),
 543                      DAG.getConstant(ByteOffset, MVT::i32), // PTR
 544                      MachinePointerInfo(ConstantPointerNull::get(PtrType)),
 545                      false, false, false, 0);
 546 }
 547
 548 SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
 549   DebugLoc DL = Op.getDebugLoc();
 550   EVT VT = Op.getValueType();
 551
 552   return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
 553                      Op.getOperand(0),
 554                      Op.getOperand(0),
 555                      DAG.getNode(ISD::SUB, DL, VT,
 556                                  DAG.getConstant(32, MVT::i32),
 557                                  Op.getOperand(1)));
 558 }
 559
 560 bool R600TargetLowering::isZero(SDValue Op) const {
 561   if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
 562     return Cst->isNullValue();
 563   } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
 564     return CstFP->isZero();
 565   } else {
 566     return false;
 567   }
 568 }
 569
 570 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
 571   DebugLoc DL = Op.getDebugLoc();
 572   EVT VT = Op.getValueType();
 573
 574   SDValue LHS = Op.getOperand(0);
 575   SDValue RHS = Op.getOperand(1);
 576   SDValue True = Op.getOperand(2);
 577   SDValue False = Op.getOperand(3);
 578   SDValue CC = Op.getOperand(4);
 579   SDValue Temp;
 580
 581   // LHS and RHS are guaranteed to be the same value type
 582   EVT CompareVT = LHS.getValueType();
 583
 584   // Check if we can lower this to a native operation.
 585
 586   // Try to lower to a CND* instruction:
 587   // CND* instructions requires RHS to be zero.  Some SELECT_CC nodes that
 588   // can be lowered to CND* instructions can also be lowered to SET*
 589   // instructions.  CND* instructions are cheaper, because they dont't
 590   // require additional instructions to convert their result to the correct
 591   // value type, so this check should be first.
 592   if (isZero(LHS) || isZero(RHS)) {
 593     SDValue Cond = (isZero(LHS) ? RHS : LHS);
 594     SDValue Zero = (isZero(LHS) ? LHS : RHS);
 595     ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
 596     if (CompareVT != VT) {
 597       // Bitcast True / False to the correct types.  This will end up being
 598       // a nop, but it allows us to define only a single pattern in the
 599       // .TD files for each CND* instruction rather than having to have
 600       // one pattern for integer True/False and one for fp True/False
 601       True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
 602       False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
 603     }
 604     if (isZero(LHS)) {
 605       CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
 606     }
 607
 608     switch (CCOpcode) {
 609     case ISD::SETONE:
 610     case ISD::SETUNE:
 611     case ISD::SETNE:
 612     case ISD::SETULE:
 613     case ISD::SETULT:
 614     case ISD::SETOLE:
 615     case ISD::SETOLT:
 616     case ISD::SETLE:
 617     case ISD::SETLT:
 618       CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
 619       Temp = True;
 620       True = False;
 621       False = Temp;
 622       break;
 623     default:
 624       break;
 625     }
 626     SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
 627         Cond, Zero,
 628         True, False,
 629         DAG.getCondCode(CCOpcode));
 630     return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
 631   }
 632
 633   // Try to lower to a SET* instruction:
 634   // We need all the operands of SELECT_CC to have the same value type, so if
 635   // necessary we need to change True and False to be the same type as LHS and
 636   // RHS, and then convert the result of the select_cc back to the correct type.
 637
 638   // Move hardware True/False values to the correct operand.
 639   if (isHWTrueValue(False) && isHWFalseValue(True)) {
 640     ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
 641     std::swap(False, True);
 642     CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
 643   }
 644
 645   if (isHWTrueValue(True) && isHWFalseValue(False)) {
 646     if (CompareVT !=  VT) {
 647       if (VT == MVT::f32 && CompareVT == MVT::i32) {
 648         SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
 649             LHS, RHS,
 650             DAG.getConstant(-1, MVT::i32),
 651             DAG.getConstant(0, MVT::i32),
 652             CC);
 653         // Convert integer values of true (-1) and false (0) to fp values of
 654         // true (1.0f) and false (0.0f).
 655         SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
 656                                                   DAG.getConstant(1, MVT::i32));
 657         return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
 658       } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
 659         SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
 660             LHS, RHS,
 661             DAG.getConstantFP(1.0f, MVT::f32),
 662             DAG.getConstantFP(0.0f, MVT::f32),
 663             CC);
 664         // Convert fp values of true (1.0f) and false (0.0f) to integer values
 665         // of true (-1) and false (0).
 666         SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt);
 667         return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg);
 668       } else {
 669         // I don't think there will be any other type pairings.
 670         assert(!"Unhandled operand type parings in SELECT_CC");
 671       }
 672     } else {
 673       // This SELECT_CC is already legal.
 674       return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
 675     }
 676   }
 677
 678   // Possible Min/Max pattern
 679   SDValue MinMax = LowerMinMax(Op, DAG);
 680   if (MinMax.getNode()) {
 681     return MinMax;
 682   }
 683
 684   // If we make it this for it means we have no native instructions to handle
 685   // this SELECT_CC, so we must lower it.
 686   SDValue HWTrue, HWFalse;
 687
 688   if (CompareVT == MVT::f32) {
 689     HWTrue = DAG.getConstantFP(1.0f, CompareVT);
 690     HWFalse = DAG.getConstantFP(0.0f, CompareVT);
 691   } else if (CompareVT == MVT::i32) {
 692     HWTrue = DAG.getConstant(-1, CompareVT);
 693     HWFalse = DAG.getConstant(0, CompareVT);
 694   }
 695   else {
 696     assert(!"Unhandled value type in LowerSELECT_CC");
 697   }
 698
 699   // Lower this unsupported SELECT_CC into a combination of two supported
 700   // SELECT_CC operations.
 701   SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
 702
 703   return DAG.getNode(ISD::SELECT_CC, DL, VT,
 704       Cond, HWFalse,
 705       True, False,
 706       DAG.getCondCode(ISD::SETNE));
 707 }
 708
 709 SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
 710   return DAG.getNode(ISD::SELECT_CC,
 711       Op.getDebugLoc(),
 712       Op.getValueType(),
 713       Op.getOperand(0),
 714       DAG.getConstant(0, MVT::i32),
 715       Op.getOperand(1),
 716       Op.getOperand(2),
 717       DAG.getCondCode(ISD::SETNE));
 718 }
 719
 720 SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
 721   SDValue Cond;
 722   SDValue LHS = Op.getOperand(0);
 723   SDValue RHS = Op.getOperand(1);
 724   SDValue CC  = Op.getOperand(2);
 725   DebugLoc DL = Op.getDebugLoc();
 726   assert(Op.getValueType() == MVT::i32);
 727   if (LHS.getValueType() == MVT::i32) {
 728     Cond = DAG.getNode(
 729         ISD::SELECT_CC,
 730         Op.getDebugLoc(),
 731         MVT::i32,
 732         LHS, RHS,
 733         DAG.getConstant(-1, MVT::i32),
 734         DAG.getConstant(0, MVT::i32),
 735         CC);
 736   } else if (LHS.getValueType() == MVT::f32) {
 737     Cond = DAG.getNode(
 738         ISD::SELECT_CC,
 739         Op.getDebugLoc(),
 740         MVT::f32,
 741         LHS, RHS,
 742         DAG.getConstantFP(1.0f, MVT::f32),
 743         DAG.getConstantFP(0.0f, MVT::f32),
 744         CC);
 745     Cond = DAG.getNode(
 746         ISD::FP_TO_SINT,
 747         DL,
 748         MVT::i32,
 749         Cond);
 750   } else {
 751     assert(0 && "Not valid type for set_cc");
 752   }
 753   Cond = DAG.getNode(
 754       ISD::AND,
 755       DL,
 756       MVT::i32,
 757       DAG.getConstant(1, MVT::i32),
 758       Cond);
 759   return Cond;
 760 }
 761
 762 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
 763   DebugLoc DL = Op.getDebugLoc();
 764   StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
 765   SDValue Chain = Op.getOperand(0);
 766   SDValue Value = Op.getOperand(1);
 767   SDValue Ptr = Op.getOperand(2);
 768
 769   if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
 770       Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
 771     // Convert pointer from byte address to dword address.
 772     Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
 773                       DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
 774                                   Ptr, DAG.getConstant(2, MVT::i32)));
 775
 776     if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
 777       assert(!"Truncated and indexed stores not supported yet");
 778     } else {
 779       Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
 780     }
 781     return Chain;
 782   }
 783   return SDValue();
 784 }
 785
 786 // return (512 + (kc_bank << 12)
 787 static int
 788 ConstantAddressBlock(unsigned AddressSpace) {
 789   switch (AddressSpace) {
 790   case AMDGPUAS::CONSTANT_BUFFER_0:
 791     return 512;
 792   case AMDGPUAS::CONSTANT_BUFFER_1:
 793     return 512 + 4096;
 794   case AMDGPUAS::CONSTANT_BUFFER_2:
 795     return 512 + 4096 * 2;
 796   case AMDGPUAS::CONSTANT_BUFFER_3:
 797     return 512 + 4096 * 3;
 798   case AMDGPUAS::CONSTANT_BUFFER_4:
 799     return 512 + 4096 * 4;
 800   case AMDGPUAS::CONSTANT_BUFFER_5:
 801     return 512 + 4096 * 5;
 802   case AMDGPUAS::CONSTANT_BUFFER_6:
 803     return 512 + 4096 * 6;
 804   case AMDGPUAS::CONSTANT_BUFFER_7:
 805     return 512 + 4096 * 7;
 806   case AMDGPUAS::CONSTANT_BUFFER_8:
 807     return 512 + 4096 * 8;
 808   case AMDGPUAS::CONSTANT_BUFFER_9:
 809     return 512 + 4096 * 9;
 810   case AMDGPUAS::CONSTANT_BUFFER_10:
 811     return 512 + 4096 * 10;
 812   case AMDGPUAS::CONSTANT_BUFFER_11:
 813     return 512 + 4096 * 11;
 814   case AMDGPUAS::CONSTANT_BUFFER_12:
 815     return 512 + 4096 * 12;
 816   case AMDGPUAS::CONSTANT_BUFFER_13:
 817     return 512 + 4096 * 13;
 818   case AMDGPUAS::CONSTANT_BUFFER_14:
 819     return 512 + 4096 * 14;
 820   case AMDGPUAS::CONSTANT_BUFFER_15:
 821     return 512 + 4096 * 15;
 822   default:
 823     return -1;
 824   }
 825 }
 826
 827 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
 828 {
 829   EVT VT = Op.getValueType();
 830   DebugLoc DL = Op.getDebugLoc();
 831   LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
 832   SDValue Chain = Op.getOperand(0);
 833   SDValue Ptr = Op.getOperand(1);
 834   SDValue LoweredLoad;
 835
 836   int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
 837   if (ConstantBlock > -1) {
 838     SDValue Result;
 839     if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
 840         dyn_cast<Constant>(LoadNode->getSrcValue())) {
 841       SDValue Slots[4];
 842       for (unsigned i = 0; i < 4; i++) {
 843         // We want Const position encoded with the following formula :
 844         // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
 845         // const_index is Ptr computed by llvm using an alignment of 16.
 846         // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
 847         // then div by 4 at the ISel step
 848         SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
 849             DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
 850         Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
 851       }
 852       Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
 853     } else {
 854       // non constant ptr cant be folded, keeps it as a v4f32 load
 855       Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
 856           DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32))
 857           );
 858     }
 859
 860     if (!VT.isVector()) {
 861       Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
 862           DAG.getConstant(0, MVT::i32));
 863     }
 864
 865     SDValue MergedValues[2] = {
 866         Result,
 867         Chain
 868     };
 869     return DAG.getMergeValues(MergedValues, 2, DL);
 870   }
 871
 872   return SDValue();
 873 }
 874
 875 SDValue R600TargetLowering::LowerFPOW(SDValue Op,
 876     SelectionDAG &DAG) const {
 877   DebugLoc DL = Op.getDebugLoc();
 878   EVT VT = Op.getValueType();
 879   SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0));
 880   SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase);
 881   return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase);
 882 }
 883
 884 /// XXX Only kernel functions are supported, so we can assume for now that
 885 /// every function is a kernel function, but in the future we should use
 886 /// separate calling conventions for kernel and non-kernel functions.
 887 SDValue R600TargetLowering::LowerFormalArguments(
 888                                       SDValue Chain,
 889                                       CallingConv::ID CallConv,
 890                                       bool isVarArg,
 891                                       const SmallVectorImpl<ISD::InputArg> &Ins,
 892                                       DebugLoc DL, SelectionDAG &DAG,
 893                                       SmallVectorImpl<SDValue> &InVals) const {
 894   unsigned ParamOffsetBytes = 36;
 895   Function::const_arg_iterator FuncArg =
 896                             DAG.getMachineFunction().getFunction()->arg_begin();
 897   for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
 898     EVT VT = Ins[i].VT;
 899     Type *ArgType = FuncArg->getType();
 900     unsigned ArgSizeInBits = ArgType->isPointerTy() ?
 901                              32 : ArgType->getPrimitiveSizeInBits();
 902     unsigned ArgBytes = ArgSizeInBits >> 3;
 903     EVT ArgVT;
 904     if (ArgSizeInBits < VT.getSizeInBits()) {
 905       assert(!ArgType->isFloatTy() &&
 906              "Extending floating point arguments not supported yet");
 907       ArgVT = MVT::getIntegerVT(ArgSizeInBits);
 908     } else {
 909       ArgVT = VT;
 910     }
 911     PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
 912                                                     AMDGPUAS::PARAM_I_ADDRESS);
 913     SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
 914                                 DAG.getConstant(ParamOffsetBytes, MVT::i32),
 915                                        MachinePointerInfo(new Argument(PtrTy)),
 916                                        ArgVT, false, false, ArgBytes);
 917     InVals.push_back(Arg);
 918     ParamOffsetBytes += ArgBytes;
 919   }
 920   return Chain;
 921 }
 922
 923 EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
 924    if (!VT.isVector()) return MVT::i32;
 925    return VT.changeVectorElementTypeToInteger();
 926 }
 927
 928 //===----------------------------------------------------------------------===//
 929 // Custom DAG Optimizations
 930 //===----------------------------------------------------------------------===//
 931
 932 SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
 933                                               DAGCombinerInfo &DCI) const {
 934   SelectionDAG &DAG = DCI.DAG;
 935
 936   switch (N->getOpcode()) {
 937   // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
 938   case ISD::FP_ROUND: {
 939       SDValue Arg = N->getOperand(0);
 940       if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
 941         return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
 942                            Arg.getOperand(0));
 943       }
 944       break;
 945     }
 946   // Extract_vec (Build_vector) generated by custom lowering
 947   // also needs to be customly combined
 948   case ISD::EXTRACT_VECTOR_ELT: {
 949     SDValue Arg = N->getOperand(0);
 950     if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
 951       if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
 952         unsigned Element = Const->getZExtValue();
 953         return Arg->getOperand(Element);
 954       }
 955     }
 956     if (Arg.getOpcode() == ISD::BITCAST &&
 957         Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
 958       if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
 959         unsigned Element = Const->getZExtValue();
 960         return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(),
 961             Arg->getOperand(0).getOperand(Element));
 962       }
 963     }
 964   }
 965   }
 966   return SDValue();
 967 }