1 //===- CleanupGCCOutput.cpp - Cleanup GCC Output --------------------------===//
3 // This pass is used to cleanup the output of GCC. GCC's output is
4 // unneccessarily gross for a couple of reasons. This pass does the following
5 // things to try to clean it up:
7 // * Eliminate names for GCC types that we know can't be needed by the user.
8 // * Eliminate names for types that are unused in the entire translation unit
9 // * Fix various problems that we might have in PHI nodes and casts
11 // Note: This code produces dead declarations, it is a good idea to run DCE
14 //===----------------------------------------------------------------------===//
16 #include "llvm/Transforms/CleanupGCCOutput.h"
17 #include "llvm/Analysis/FindUsedTypes.h"
18 #include "llvm/Module.h"
19 #include "llvm/SymbolTable.h"
20 #include "llvm/DerivedTypes.h"
21 #include "llvm/iPHINode.h"
22 #include "llvm/iMemory.h"
23 #include "llvm/iTerminators.h"
24 #include "llvm/iOther.h"
25 #include "llvm/Support/CFG.h"
26 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
27 #include "Support/StatisticReporter.h"
31 static Statistic<> NumTypeSymtabEntriesKilled("cleangcc\t- Number of unused typenames removed from symtab");
32 static Statistic<> NumCastsMoved("cleangcc\t- Number of casts removed from head of basic block");
33 static Statistic<> NumRefactoredPreds("cleangcc\t- Number of predecessor blocks refactored");
38 struct CleanupGCCOutput : public FunctionPass {
39 const char *getPassName() const { return "Cleanup GCC Output"; }
41 // doPassInitialization - For this pass, it removes global symbol table
42 // entries for primitive types. These are never used for linking in GCC and
43 // they make the output uglier to look at, so we nuke them.
45 // Also, initialize instance variables.
47 bool doInitialization(Module *M);
49 // runOnFunction - This method simplifies the specified function hopefully.
51 bool runOnFunction(Function *F);
53 // doPassFinalization - Strip out type names that are unused by the program
54 bool doFinalization(Module *M);
56 // getAnalysisUsage - This function needs FindUsedTypes to do its job...
58 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
59 AU.addRequired(FindUsedTypes::ID);
64 Pass *createCleanupGCCOutputPass() {
65 return new CleanupGCCOutput();
70 // ShouldNukSymtabEntry - Return true if this module level symbol table entry
71 // should be eliminated.
73 static inline bool ShouldNukeSymtabEntry(const std::pair<std::string,Value*>&E){
74 // Nuke all names for primitive types!
75 if (cast<Type>(E.second)->isPrimitiveType()) return true;
77 // Nuke all pointers to primitive types as well...
78 if (const PointerType *PT = dyn_cast<PointerType>(E.second))
79 if (PT->getElementType()->isPrimitiveType()) return true;
84 // doInitialization - For this pass, it removes global symbol table
85 // entries for primitive types. These are never used for linking in GCC and
86 // they make the output uglier to look at, so we nuke them.
88 bool CleanupGCCOutput::doInitialization(Module *M) {
91 if (M->hasSymbolTable()) {
92 SymbolTable *ST = M->getSymbolTable();
94 // Check the symbol table for superfluous type entries...
96 // Grab the 'type' plane of the module symbol...
97 SymbolTable::iterator STI = ST->find(Type::TypeTy);
98 if (STI != ST->end()) {
99 // Loop over all entries in the type plane...
100 SymbolTable::VarMap &Plane = STI->second;
101 for (SymbolTable::VarMap::iterator PI = Plane.begin(); PI != Plane.end();)
102 if (ShouldNukeSymtabEntry(*PI)) { // Should we remove this entry?
103 #if MAP_IS_NOT_BRAINDEAD
104 PI = Plane.erase(PI); // STD C++ Map should support this!
106 Plane.erase(PI); // Alas, GCC 2.95.3 doesn't *SIGH*
109 ++NumTypeSymtabEntriesKilled;
121 // FixCastsAndPHIs - The LLVM GCC has a tendancy to intermix Cast instructions
122 // in with the PHI nodes. These cast instructions are potentially there for two
123 // different reasons:
125 // 1. The cast could be for an early PHI, and be accidentally inserted before
126 // another PHI node. In this case, the PHI node should be moved to the end
127 // of the PHI nodes in the basic block. We know that it is this case if
128 // the source for the cast is a PHI node in this basic block.
130 // 2. If not #1, the cast must be a source argument for one of the PHI nodes
131 // in the current basic block. If this is the case, the cast should be
132 // lifted into the basic block for the appropriate predecessor.
134 static inline bool FixCastsAndPHIs(BasicBlock *BB) {
135 bool Changed = false;
137 BasicBlock::iterator InsertPos = BB->begin();
139 // Find the end of the interesting instructions...
140 while (isa<PHINode>(*InsertPos) || isa<CastInst>(*InsertPos)) ++InsertPos;
142 // Back the InsertPos up to right after the last PHI node.
143 while (InsertPos != BB->begin() && isa<CastInst>(*(InsertPos-1))) --InsertPos;
145 // No PHI nodes, quick exit.
146 if (InsertPos == BB->begin()) return false;
148 // Loop over all casts trapped between the PHI's...
149 BasicBlock::iterator I = BB->begin();
150 while (I != InsertPos) {
151 if (CastInst *CI = dyn_cast<CastInst>(*I)) { // Fix all cast instructions
152 Value *Src = CI->getOperand(0);
154 // Move the cast instruction to the current insert position...
155 --InsertPos; // New position for cast to go...
156 std::swap(*InsertPos, *I); // Cast goes down, PHI goes up
161 if (isa<PHINode>(Src) && // Handle case #1
162 cast<PHINode>(Src)->getParent() == BB) {
163 // We're done for case #1
164 } else { // Handle case #2
165 // In case #2, we have to do a few things:
166 // 1. Remove the cast from the current basic block.
167 // 2. Identify the PHI node that the cast is for.
168 // 3. Find out which predecessor the value is for.
169 // 4. Move the cast to the end of the basic block that it SHOULD be
172 // Remove the cast instruction from the basic block. The remove only
173 // invalidates iterators in the basic block that are AFTER the removed
174 // element. Because we just moved the CastInst to the InsertPos, no
175 // iterators get invalidated.
177 BB->getInstList().remove(InsertPos);
179 // Find the PHI node. Since this cast was generated specifically for a
180 // PHI node, there can only be a single PHI node using it.
182 assert(CI->use_size() == 1 && "Exactly one PHI node should use cast!");
183 PHINode *PN = cast<PHINode>(*CI->use_begin());
185 // Find out which operand of the PHI it is...
187 for (i = 0; i < PN->getNumIncomingValues(); ++i)
188 if (PN->getIncomingValue(i) == CI)
190 assert(i != PN->getNumIncomingValues() && "PHI doesn't use cast!");
192 // Get the predecessor the value is for...
193 BasicBlock *Pred = PN->getIncomingBlock(i);
195 // Reinsert the cast right before the terminator in Pred.
196 Pred->getInstList().insert(Pred->end()-1, CI);
207 // RefactorPredecessor - When we find out that a basic block is a repeated
208 // predecessor in a PHI node, we have to refactor the function until there is at
209 // most a single instance of a basic block in any predecessor list.
211 static inline void RefactorPredecessor(BasicBlock *BB, BasicBlock *Pred) {
212 Function *M = BB->getParent();
213 assert(find(pred_begin(BB), pred_end(BB), Pred) != pred_end(BB) &&
214 "Pred is not a predecessor of BB!");
216 // Create a new basic block, adding it to the end of the function.
217 BasicBlock *NewBB = new BasicBlock("", M);
219 // Add an unconditional branch to BB to the new block.
220 NewBB->getInstList().push_back(new BranchInst(BB));
222 // Get the terminator that causes a branch to BB from Pred.
223 TerminatorInst *TI = Pred->getTerminator();
225 // Find the first use of BB in the terminator...
226 User::op_iterator OI = find(TI->op_begin(), TI->op_end(), BB);
227 assert(OI != TI->op_end() && "Pred does not branch to BB!!!");
229 // Change the use of BB to point to the new stub basic block
232 // Now we need to loop through all of the PHI nodes in BB and convert their
233 // first incoming value for Pred to reference the new basic block instead.
235 for (BasicBlock::iterator I = BB->begin();
236 PHINode *PN = dyn_cast<PHINode>(*I); ++I) {
237 int BBIdx = PN->getBasicBlockIndex(Pred);
238 assert(BBIdx != -1 && "PHI node doesn't have an entry for Pred!");
240 // The value that used to look like it came from Pred now comes from NewBB
241 PN->setIncomingBlock((unsigned)BBIdx, NewBB);
246 // runOnFunction - Loop through the function and fix problems with the PHI nodes
247 // in the current function. The problem is that PHI nodes might exist with
248 // multiple entries for the same predecessor. GCC sometimes generates code that
251 // bb7: br bool %cond1004, label %bb8, label %bb8
252 // bb8: %reg119 = phi uint [ 0, %bb7 ], [ 1, %bb7 ]
254 // which is completely illegal LLVM code. To compensate for this, we insert
255 // an extra basic block, and convert the code to look like this:
257 // bb7: br bool %cond1004, label %bbX, label %bb8
259 // bb8: %reg119 = phi uint [ 0, %bbX ], [ 1, %bb7 ]
262 bool CleanupGCCOutput::runOnFunction(Function *M) {
263 bool Changed = false;
264 // Don't use iterators because invalidation gets messy...
265 for (unsigned MI = 0; MI < M->size(); ++MI) {
266 BasicBlock *BB = M->getBasicBlocks()[MI];
268 Changed |= FixCastsAndPHIs(BB);
270 if (isa<PHINode>(BB->front())) {
271 const vector<BasicBlock*> Preds(pred_begin(BB), pred_end(BB));
273 // Handle the problem. Sort the list of predecessors so that it is easy
274 // to decide whether or not duplicate predecessors exist.
275 vector<BasicBlock*> SortedPreds(Preds);
276 sort(SortedPreds.begin(), SortedPreds.end());
278 // Loop over the predecessors, looking for adjacent BB's that are equal.
279 BasicBlock *LastOne = 0;
280 for (unsigned i = 0; i < Preds.size(); ++i) {
281 if (SortedPreds[i] == LastOne) { // Found a duplicate.
282 RefactorPredecessor(BB, SortedPreds[i]);
283 ++NumRefactoredPreds;
286 LastOne = SortedPreds[i];
293 bool CleanupGCCOutput::doFinalization(Module *M) {
294 bool Changed = false;
296 if (M->hasSymbolTable()) {
297 SymbolTable *ST = M->getSymbolTable();
298 const std::set<const Type *> &UsedTypes =
299 getAnalysis<FindUsedTypes>().getTypes();
301 // Check the symbol table for superfluous type entries that aren't used in
304 // Grab the 'type' plane of the module symbol...
305 SymbolTable::iterator STI = ST->find(Type::TypeTy);
306 if (STI != ST->end()) {
307 // Loop over all entries in the type plane...
308 SymbolTable::VarMap &Plane = STI->second;
309 for (SymbolTable::VarMap::iterator PI = Plane.begin(); PI != Plane.end();)
310 if (!UsedTypes.count(cast<Type>(PI->second))) {
311 #if MAP_IS_NOT_BRAINDEAD
312 PI = Plane.erase(PI); // STD C++ Map should support this!
314 Plane.erase(PI); // Alas, GCC 2.95.3 doesn't *SIGH*
315 PI = Plane.begin(); // N^2 algorithms are fun. :(