On DataLayout, omit the default of p:64:64:64.
[oota-llvm.git] / lib / Target / R600 / R600ControlFlowFinalizer.cpp
index b69d38bbdddfed818b2243a369c2ab00df751a74..ec39e097ba951fab3c9fc2a6fc15f5cf04b00298 100644 (file)
@@ -332,6 +332,7 @@ public:
 
     unsigned MaxStack = 0;
     unsigned CurrentStack = 0;
+    unsigned CurrentLoopDepth = 0;
     bool HasPush = false;
     for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
         ++MB) {
@@ -370,21 +371,22 @@ public:
           CurrentStack++;
           MaxStack = std::max(MaxStack, CurrentStack);
           HasPush = true;
+          if (ST.hasCaymanISA() && CurrentLoopDepth > 1) {
+            BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_CM))
+                .addImm(CfCount + 1)
+                .addImm(1);
+            MI->setDesc(TII->get(AMDGPU::CF_ALU));
+            CfCount++;
+          }
         case AMDGPU::CF_ALU:
           I = MI;
           AluClauses.push_back(MakeALUClause(MBB, I));
-        case AMDGPU::EG_ExportBuf:
-        case AMDGPU::EG_ExportSwz:
-        case AMDGPU::R600_ExportBuf:
-        case AMDGPU::R600_ExportSwz:
-        case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
-        case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
-        case AMDGPU::RAT_STORE_DWORD_cm:
           DEBUG(dbgs() << CfCount << ":"; MI->dump(););
           CfCount++;
           break;
         case AMDGPU::WHILELOOP: {
           CurrentStack+=4;
+          CurrentLoopDepth++;
           MaxStack = std::max(MaxStack, CurrentStack);
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
               getHWInstrDesc(CF_WHILE_LOOP))
@@ -399,6 +401,7 @@ public:
         }
         case AMDGPU::ENDLOOP: {
           CurrentStack-=4;
+          CurrentLoopDepth--;
           std::pair<unsigned, std::set<MachineInstr *> > Pair =
               LoopStack.back();
           LoopStack.pop_back();
@@ -457,18 +460,11 @@ public:
           MI->eraseFromParent();
           break;
         }
-        case AMDGPU::PREDICATED_BREAK: {
-          CurrentStack--;
-          CfCount += 3;
-          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP))
-              .addImm(CfCount)
-              .addImm(1);
+        case AMDGPU::BREAK: {
+          CfCount ++;
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
               getHWInstrDesc(CF_LOOP_BREAK))
               .addImm(0);
-          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_POP))
-              .addImm(CfCount)
-              .addImm(1);
           LoopStack.back().second.insert(MIb);
           MI->eraseFromParent();
           break;
@@ -496,6 +492,10 @@ public:
             EmitALUClause(I, AluClauses[i], CfCount);
         }
         default:
+          if (TII->isExport(MI->getOpcode())) {
+            DEBUG(dbgs() << CfCount << ":"; MI->dump(););
+            CfCount++;
+          }
           break;
         }
       }