R600/SI: Emit config values in register value pairs.
[oota-llvm.git] / lib / Target / R600 / R600ControlFlowFinalizer.cpp
index 01dd22718ca617b18c76a6d6d6a37cec4651a34d..bc1ca58b863e0c8e93f6b85abed3e04ae9e09d5a 100644 (file)
@@ -30,9 +30,22 @@ namespace llvm {
 class R600ControlFlowFinalizer : public MachineFunctionPass {
 
 private:
+  enum ControlFlowInstruction {
+    CF_TC,
+    CF_CALL_FS,
+    CF_WHILE_LOOP,
+    CF_END_LOOP,
+    CF_LOOP_BREAK,
+    CF_LOOP_CONTINUE,
+    CF_JUMP,
+    CF_ELSE,
+    CF_POP
+  };
+
   static char ID;
   const R600InstrInfo *TII;
   unsigned MaxFetchInst;
+  const AMDGPUSubtarget &ST;
 
   bool isFetch(const MachineInstr *MI) const {
     switch (MI->getOpcode()) {
@@ -54,6 +67,13 @@ private:
     case AMDGPU::TEX_SAMPLE_C_G:
     case AMDGPU::TXD:
     case AMDGPU::TXD_SHADOW:
+    case AMDGPU::VTX_READ_GLOBAL_8_eg:
+    case AMDGPU::VTX_READ_GLOBAL_32_eg:
+    case AMDGPU::VTX_READ_GLOBAL_128_eg:
+    case AMDGPU::VTX_READ_PARAM_8_eg:
+    case AMDGPU::VTX_READ_PARAM_16_eg:
+    case AMDGPU::VTX_READ_PARAM_32_eg:
+    case AMDGPU::VTX_READ_PARAM_128_eg:
      return true;
     default:
       return false;
@@ -70,6 +90,52 @@ private:
     }
   }
 
+  const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
+    if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) {
+      switch (CFI) {
+      case CF_TC:
+        return TII->get(AMDGPU::CF_TC_R600);
+      case CF_CALL_FS:
+        return TII->get(AMDGPU::CF_CALL_FS_R600);
+      case CF_WHILE_LOOP:
+        return TII->get(AMDGPU::WHILE_LOOP_R600);
+      case CF_END_LOOP:
+        return TII->get(AMDGPU::END_LOOP_R600);
+      case CF_LOOP_BREAK:
+        return TII->get(AMDGPU::LOOP_BREAK_R600);
+      case CF_LOOP_CONTINUE:
+        return TII->get(AMDGPU::CF_CONTINUE_R600);
+      case CF_JUMP:
+        return TII->get(AMDGPU::CF_JUMP_R600);
+      case CF_ELSE:
+        return TII->get(AMDGPU::CF_ELSE_R600);
+      case CF_POP:
+        return TII->get(AMDGPU::POP_R600);
+      }
+    } else {
+      switch (CFI) {
+      case CF_TC:
+        return TII->get(AMDGPU::CF_TC_EG);
+      case CF_CALL_FS:
+        return TII->get(AMDGPU::CF_CALL_FS_EG);
+      case CF_WHILE_LOOP:
+        return TII->get(AMDGPU::WHILE_LOOP_EG);
+      case CF_END_LOOP:
+        return TII->get(AMDGPU::END_LOOP_EG);
+      case CF_LOOP_BREAK:
+        return TII->get(AMDGPU::LOOP_BREAK_EG);
+      case CF_LOOP_CONTINUE:
+        return TII->get(AMDGPU::CF_CONTINUE_EG);
+      case CF_JUMP:
+        return TII->get(AMDGPU::CF_JUMP_EG);
+      case CF_ELSE:
+        return TII->get(AMDGPU::CF_ELSE_EG);
+      case CF_POP:
+        return TII->get(AMDGPU::POP_EG);
+      }
+    }
+  }
+
   MachineBasicBlock::iterator
   MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
       unsigned CfAddress) const {
@@ -85,7 +151,7 @@ private:
         break;
     }
     BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
-        TII->get(AMDGPU::CF_TC))
+        getHWInstrDesc(CF_TC))
         .addImm(CfAddress) // ADDR
         .addImm(AluInstCount); // COUNT
     return I;
@@ -104,7 +170,8 @@ private:
 
 public:
   R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
-    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) {
+    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
+    ST(tm.getSubtarget<AMDGPUSubtarget>()) {
       const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
       if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
         MaxFetchInst = 8;
@@ -124,7 +191,7 @@ public:
       R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
       if (MFI->ShaderType == 1) {
         BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
-            TII->get(AMDGPU::CF_CALL_FS));
+            getHWInstrDesc(CF_CALL_FS));
         CfCount++;
       }
       for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
@@ -143,6 +210,12 @@ public:
           CurrentStack++;
           MaxStack = std::max(MaxStack, CurrentStack);
         case AMDGPU::CF_ALU:
+        case AMDGPU::EG_ExportBuf:
+        case AMDGPU::EG_ExportSwz:
+        case AMDGPU::R600_ExportBuf:
+        case AMDGPU::R600_ExportSwz:
+        case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
+        case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
           DEBUG(dbgs() << CfCount << ":"; MI->dump(););
           CfCount++;
           break;
@@ -150,8 +223,8 @@ public:
           CurrentStack++;
           MaxStack = std::max(MaxStack, CurrentStack);
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::WHILE_LOOP))
-              .addImm(2);
+              getHWInstrDesc(CF_WHILE_LOOP))
+              .addImm(1);
           std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
               std::set<MachineInstr *>());
           Pair.second.insert(MIb);
@@ -166,7 +239,7 @@ public:
               LoopStack.back();
           LoopStack.pop_back();
           CounterPropagateAddr(Pair.second, CfCount);
-          BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP))
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
               .addImm(Pair.first + 1);
           MI->eraseFromParent();
           CfCount++;
@@ -174,7 +247,7 @@ public:
         }
         case AMDGPU::IF_PREDICATE_SET: {
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::CF_JUMP))
+              getHWInstrDesc(CF_JUMP))
               .addImm(0)
               .addImm(0);
           IfThenElseStack.push_back(MIb);
@@ -188,7 +261,7 @@ public:
           IfThenElseStack.pop_back();
           CounterPropagateAddr(JumpInst, CfCount);
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::CF_ELSE))
+              getHWInstrDesc(CF_ELSE))
               .addImm(0)
               .addImm(1);
           DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
@@ -201,11 +274,12 @@ public:
           CurrentStack--;
           MachineInstr *IfOrElseInst = IfThenElseStack.back();
           IfThenElseStack.pop_back();
-          CounterPropagateAddr(IfOrElseInst, CfCount);
+          CounterPropagateAddr(IfOrElseInst, CfCount + 1);
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::POP))
+              getHWInstrDesc(CF_POP))
               .addImm(CfCount + 1)
               .addImm(1);
+          (void)MIb;
           DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
           MI->eraseFromParent();
           CfCount++;
@@ -214,13 +288,13 @@ public:
         case AMDGPU::PREDICATED_BREAK: {
           CurrentStack--;
           CfCount += 3;
-          BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP))
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP))
               .addImm(CfCount)
               .addImm(1);
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::LOOP_BREAK))
+              getHWInstrDesc(CF_LOOP_BREAK))
               .addImm(0);
-          BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP))
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_POP))
               .addImm(CfCount)
               .addImm(1);
           LoopStack.back().second.insert(MIb);
@@ -229,7 +303,7 @@ public:
         }
         case AMDGPU::CONTINUE: {
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::CF_CONTINUE))
+              getHWInstrDesc(CF_LOOP_CONTINUE))
               .addImm(0);
           LoopStack.back().second.insert(MIb);
           MI->eraseFromParent();
@@ -261,4 +335,3 @@ char R600ControlFlowFinalizer::ID = 0;
 llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
   return new R600ControlFlowFinalizer(TM);
 }
-