[NVPTX] Directly control the Machine SSA passes that are invoked for NVPTX.

author Justin Holewinski <jholewinski@nvidia.com>

Fri, 27 Jun 2014 18:35:14 +0000 (18:35 +0000)

committer Justin Holewinski <jholewinski@nvidia.com>

Fri, 27 Jun 2014 18:35:14 +0000 (18:35 +0000)
author Justin Holewinski <jholewinski@nvidia.com>
Fri, 27 Jun 2014 18:35:14 +0000 (18:35 +0000)
committer Justin Holewinski <jholewinski@nvidia.com>
Fri, 27 Jun 2014 18:35:14 +0000 (18:35 +0000)
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp

index 6e18322f824e9caf0a69244103f95f3bab81f236..069a1b9966f0dc9b2d5e62d972aa5958cd764cd5 100644 (file)
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -106,6 +106,7 @@ public:
    bool addInstSelector() override;
    bool addPreRegAlloc() override;
    bool addPostRegAlloc() override;
+  void addMachineSSAOptimization() override;
  
    FunctionPass *createTargetRegisterAllocator(bool) override;
    void addFastRegAlloc(FunctionPass *RegAllocPass) override;
@@ -207,3 +208,43 @@ void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
  
    printAndVerify("After StackSlotColoring");
  }
+
+void NVPTXPassConfig::addMachineSSAOptimization() {
+  // Pre-ra tail duplication.
+  if (addPass(&EarlyTailDuplicateID))
+    printAndVerify("After Pre-RegAlloc TailDuplicate");
+
+  // Optimize PHIs before DCE: removing dead PHI cycles may make more
+  // instructions dead.
+  addPass(&OptimizePHIsID);
+
+  // This pass merges large allocas. StackSlotColoring is a different pass
+  // which merges spill slots.
+  addPass(&StackColoringID);
+
+  // If the target requests it, assign local variables to stack slots relative
+  // to one another and simplify frame index references where possible.
+  addPass(&LocalStackSlotAllocationID);
+
+  // With optimization, dead code should already be eliminated. However
+  // there is one known exception: lowered code for arguments that are only
+  // used by tail calls, where the tail calls reuse the incoming stack
+  // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+  addPass(&DeadMachineInstructionElimID);
+  printAndVerify("After codegen DCE pass");
+
+  // Allow targets to insert passes that improve instruction level parallelism,
+  // like if-conversion. Such passes will typically need dominator trees and
+  // loop info, just like LICM and CSE below.
+  if (addILPOpts())
+    printAndVerify("After ILP optimizations");
+
+  addPass(&MachineLICMID);
+  addPass(&MachineCSEID);
+
+  addPass(&MachineSinkingID);
+  printAndVerify("After Machine LICM, CSE and Sinking passes");
+
+  addPass(&PeepholeOptimizerID);
+  printAndVerify("After codegen peephole optimization pass");
+}
author	Justin Holewinski <jholewinski@nvidia.com>
	Fri, 27 Jun 2014 18:35:14 +0000 (18:35 +0000)
committer	Justin Holewinski <jholewinski@nvidia.com>
	Fri, 27 Jun 2014 18:35:14 +0000 (18:35 +0000)