R600/SI: Emit config values in register value pairs.
[oota-llvm.git] / lib / Target / R600 / R600ControlFlowFinalizer.cpp
index bd87d741ecf08def39f8a05e0b7e7a6d3859ef0f..bc1ca58b863e0c8e93f6b85abed3e04ae9e09d5a 100644 (file)
 /// computing their address on the fly ; it also sets STACK_SIZE info.
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "r600cf"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
 #include "AMDGPU.h"
 #include "R600Defines.h"
 #include "R600InstrInfo.h"
@@ -26,9 +30,22 @@ namespace llvm {
 class R600ControlFlowFinalizer : public MachineFunctionPass {
 
 private:
+  enum ControlFlowInstruction {
+    CF_TC,
+    CF_CALL_FS,
+    CF_WHILE_LOOP,
+    CF_END_LOOP,
+    CF_LOOP_BREAK,
+    CF_LOOP_CONTINUE,
+    CF_JUMP,
+    CF_ELSE,
+    CF_POP
+  };
+
   static char ID;
   const R600InstrInfo *TII;
   unsigned MaxFetchInst;
+  const AMDGPUSubtarget &ST;
 
   bool isFetch(const MachineInstr *MI) const {
     switch (MI->getOpcode()) {
@@ -50,6 +67,13 @@ private:
     case AMDGPU::TEX_SAMPLE_C_G:
     case AMDGPU::TXD:
     case AMDGPU::TXD_SHADOW:
+    case AMDGPU::VTX_READ_GLOBAL_8_eg:
+    case AMDGPU::VTX_READ_GLOBAL_32_eg:
+    case AMDGPU::VTX_READ_GLOBAL_128_eg:
+    case AMDGPU::VTX_READ_PARAM_8_eg:
+    case AMDGPU::VTX_READ_PARAM_16_eg:
+    case AMDGPU::VTX_READ_PARAM_32_eg:
+    case AMDGPU::VTX_READ_PARAM_128_eg:
      return true;
     default:
       return false;
@@ -66,6 +90,52 @@ private:
     }
   }
 
+  const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
+    if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) {
+      switch (CFI) {
+      case CF_TC:
+        return TII->get(AMDGPU::CF_TC_R600);
+      case CF_CALL_FS:
+        return TII->get(AMDGPU::CF_CALL_FS_R600);
+      case CF_WHILE_LOOP:
+        return TII->get(AMDGPU::WHILE_LOOP_R600);
+      case CF_END_LOOP:
+        return TII->get(AMDGPU::END_LOOP_R600);
+      case CF_LOOP_BREAK:
+        return TII->get(AMDGPU::LOOP_BREAK_R600);
+      case CF_LOOP_CONTINUE:
+        return TII->get(AMDGPU::CF_CONTINUE_R600);
+      case CF_JUMP:
+        return TII->get(AMDGPU::CF_JUMP_R600);
+      case CF_ELSE:
+        return TII->get(AMDGPU::CF_ELSE_R600);
+      case CF_POP:
+        return TII->get(AMDGPU::POP_R600);
+      }
+    } else {
+      switch (CFI) {
+      case CF_TC:
+        return TII->get(AMDGPU::CF_TC_EG);
+      case CF_CALL_FS:
+        return TII->get(AMDGPU::CF_CALL_FS_EG);
+      case CF_WHILE_LOOP:
+        return TII->get(AMDGPU::WHILE_LOOP_EG);
+      case CF_END_LOOP:
+        return TII->get(AMDGPU::END_LOOP_EG);
+      case CF_LOOP_BREAK:
+        return TII->get(AMDGPU::LOOP_BREAK_EG);
+      case CF_LOOP_CONTINUE:
+        return TII->get(AMDGPU::CF_CONTINUE_EG);
+      case CF_JUMP:
+        return TII->get(AMDGPU::CF_JUMP_EG);
+      case CF_ELSE:
+        return TII->get(AMDGPU::CF_ELSE_EG);
+      case CF_POP:
+        return TII->get(AMDGPU::POP_EG);
+      }
+    }
+  }
+
   MachineBasicBlock::iterator
   MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
       unsigned CfAddress) const {
@@ -81,20 +151,13 @@ private:
         break;
     }
     BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
-        TII->get(AMDGPU::CF_TC))
+        getHWInstrDesc(CF_TC))
         .addImm(CfAddress) // ADDR
         .addImm(AluInstCount); // COUNT
     return I;
   }
   void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
-    switch (MI->getOpcode()) {
-    case AMDGPU::WHILE_LOOP:
-      MI->getOperand(0).setImm(Addr + 1);
-      break;
-    default:
-      MI->getOperand(0).setImm(Addr);
-      break;
-    }
+    MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
   }
   void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr)
       const {
@@ -107,7 +170,8 @@ private:
 
 public:
   R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
-    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) {
+    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
+    ST(tm.getSubtarget<AMDGPUSubtarget>()) {
       const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
       if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
         MaxFetchInst = 8;
@@ -123,16 +187,17 @@ public:
       MachineBasicBlock &MBB = *MB;
       unsigned CfCount = 0;
       std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
-      std::vector<std::pair<unsigned, MachineInstr *> > IfThenElseStack;
+      std::vector<MachineInstr * > IfThenElseStack;
       R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
       if (MFI->ShaderType == 1) {
         BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
-            TII->get(AMDGPU::CF_CALL_FS));
+            getHWInstrDesc(CF_CALL_FS));
         CfCount++;
       }
       for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
           I != E;) {
         if (isFetch(I)) {
+          DEBUG(dbgs() << CfCount << ":"; I->dump(););
           I = MakeFetchClause(MBB, I, 0);
           CfCount++;
           continue;
@@ -144,16 +209,22 @@ public:
         case AMDGPU::CF_ALU_PUSH_BEFORE:
           CurrentStack++;
           MaxStack = std::max(MaxStack, CurrentStack);
-        case AMDGPU::KILLGT:
         case AMDGPU::CF_ALU:
+        case AMDGPU::EG_ExportBuf:
+        case AMDGPU::EG_ExportSwz:
+        case AMDGPU::R600_ExportBuf:
+        case AMDGPU::R600_ExportSwz:
+        case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
+        case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
+          DEBUG(dbgs() << CfCount << ":"; MI->dump(););
           CfCount++;
           break;
         case AMDGPU::WHILELOOP: {
           CurrentStack++;
           MaxStack = std::max(MaxStack, CurrentStack);
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::WHILE_LOOP))
-              .addImm(0);
+              getHWInstrDesc(CF_WHILE_LOOP))
+              .addImm(1);
           std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
               std::set<MachineInstr *>());
           Pair.second.insert(MIb);
@@ -168,7 +239,7 @@ public:
               LoopStack.back();
           LoopStack.pop_back();
           CounterPropagateAddr(Pair.second, CfCount);
-          BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP))
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
               .addImm(Pair.first + 1);
           MI->eraseFromParent();
           CfCount++;
@@ -176,37 +247,40 @@ public:
         }
         case AMDGPU::IF_PREDICATE_SET: {
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::CF_JUMP))
+              getHWInstrDesc(CF_JUMP))
               .addImm(0)
               .addImm(0);
-          std::pair<unsigned, MachineInstr *> Pair(CfCount, MIb);
-          IfThenElseStack.push_back(Pair);
+          IfThenElseStack.push_back(MIb);
+          DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
           MI->eraseFromParent();
           CfCount++;
           break;
         }
         case AMDGPU::ELSE: {
-          std::pair<unsigned, MachineInstr *> Pair = IfThenElseStack.back();
+          MachineInstr * JumpInst = IfThenElseStack.back();
           IfThenElseStack.pop_back();
-          CounterPropagateAddr(Pair.second, CfCount);
+          CounterPropagateAddr(JumpInst, CfCount);
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::CF_ELSE))
+              getHWInstrDesc(CF_ELSE))
               .addImm(0)
               .addImm(1);
-          std::pair<unsigned, MachineInstr *> NewPair(CfCount, MIb);
-          IfThenElseStack.push_back(NewPair);
+          DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+          IfThenElseStack.push_back(MIb);
           MI->eraseFromParent();
           CfCount++;
           break;
         }
         case AMDGPU::ENDIF: {
           CurrentStack--;
-          std::pair<unsigned, MachineInstr *> Pair = IfThenElseStack.back();
+          MachineInstr *IfOrElseInst = IfThenElseStack.back();
           IfThenElseStack.pop_back();
-          CounterPropagateAddr(Pair.second, CfCount + 1);
-          BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP))
+          CounterPropagateAddr(IfOrElseInst, CfCount + 1);
+          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+              getHWInstrDesc(CF_POP))
               .addImm(CfCount + 1)
               .addImm(1);
+          (void)MIb;
+          DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
           MI->eraseFromParent();
           CfCount++;
           break;
@@ -214,13 +288,13 @@ public:
         case AMDGPU::PREDICATED_BREAK: {
           CurrentStack--;
           CfCount += 3;
-          BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP))
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP))
               .addImm(CfCount)
               .addImm(1);
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::LOOP_BREAK))
+              getHWInstrDesc(CF_LOOP_BREAK))
               .addImm(0);
-          BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP))
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_POP))
               .addImm(CfCount)
               .addImm(1);
           LoopStack.back().second.insert(MIb);
@@ -229,8 +303,8 @@ public:
         }
         case AMDGPU::CONTINUE: {
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::CF_CONTINUE))
-              .addImm(CfCount);
+              getHWInstrDesc(CF_LOOP_CONTINUE))
+              .addImm(0);
           LoopStack.back().second.insert(MIb);
           MI->eraseFromParent();
           CfCount++;
@@ -261,4 +335,3 @@ char R600ControlFlowFinalizer::ID = 0;
 llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
   return new R600ControlFlowFinalizer(TM);
 }
-