X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=docs%2FPasses.html;h=8c086720a8bb731b8530b5e08ea20c1d007e2fbe;hb=beaec4cbf6cfd756084eff1ee2ad18ee17c33284;hp=fb1359ff85897b4853d678547d85757054c797b2;hpb=0e15dc26ec9fccbde6091309434da4004d9d14ae;p=oota-llvm.git diff --git a/docs/Passes.html b/docs/Passes.html index fb1359ff858..8c086720a8b 100644 --- a/docs/Passes.html +++ b/docs/Passes.html @@ -79,15 +79,14 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if ! -basicaaBasic Alias Analysis (default AA impl) -basiccgBasic CallGraph Construction -basicvnBasic Value Numbering (default GVN impl) --callgraphPrint a call graph --callsccPrint SCCs of the Call Graph --cfgsccPrint SCCs of each function CFG -codegenprepareOptimize for code generation -count-aaCount Alias Analysis Query Responses -debug-aaAA use debugger -domfrontierDominance Frontier Construction -domtreeDominator Tree Construction --externalfnconstantsPrint external fn callsites passed constants +-dot-callgraphPrint Call Graph to 'dot' file +-dot-cfgPrint CFG of function to 'dot' file +-dot-cfg-onlyPrint CFG of function to 'dot' file (with no function bodies) -globalsmodref-aaSimple mod/ref analysis for globals -instcountCounts the various types of Instructions -intervalsInterval Partition Construction @@ -98,13 +97,14 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if ! -no-profileNo Profile Information -postdomfrontierPost-Dominance Frontier Construction -postdomtreePost-Dominator Tree Construction --printPrint function to stderr -print-alias-setsAlias Set Printer --print-callgraphPrint Call Graph to 'dot' file --print-cfgPrint CFG of function to 'dot' file --print-cfg-onlyPrint CFG of function to 'dot' file (with no function bodies) --printmPrint module to stderr --printusedtypesFind Used Types +-print-callgraphPrint a call graph +-print-callgraph-sccsPrint SCCs of the Call Graph +-print-cfg-sccsPrint SCCs of each function CFG +-print-externalfnconstantsPrint external fn callsites passed constants +-print-functionPrint function to stderr +-print-modulePrint module to stderr +-print-used-typesFind Used Types -profile-loaderLoad profile information from llvmprof.out -scalar-evolutionScalar Evolution Analysis -targetdataTarget Data Layout @@ -116,7 +116,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if ! -argpromotionPromote 'by reference' arguments to scalars -block-placementProfile Guided Basic Block Placement -break-crit-edgesBreak critical edges in CFG --ceeCorrelated Expression Elimination +-codegenpreparePrepare a function for code generation -condpropConditional Propagation -constmergeMerge Duplicate Global Constants -constpropSimple constant propagation @@ -142,8 +142,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if ! -internalizeInternalize Global Symbols -ipconstpropInterprocedural constant propagation -ipsccpInterprocedural Sparse Conditional Constant Propagation +-jump-threadingThread control through conditional blocks -lcssaLoop-Closed SSA Form Pass -licmLoop Invariant Code Motion +-loop-deletionDead Loop Deletion Pass -loop-extractExtract loops into new functions -loop-extract-singleExtract at most one loop into a new function -loop-index-splitIndex Split Loops @@ -152,14 +154,12 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if ! -loop-unrollUnroll loops -loop-unswitchUnswitch loops -loopsimplifyCanonicalize natural loops --lower-packedlowers packed operations to operations on smaller packed datatypes -lowerallocsLower allocations from instructions to calls --lowergcLower GC intrinsics, for GCless code generators -lowerinvokeLower invoke and unwind, for unwindless code generators --lowerselectLower select instructions to branches -lowersetjmpLower Set Jump -lowerswitchLower SwitchInst's to branches -mem2regPromote Memory to Register +-memcpyoptOptimize use of memcpy and friends -mergereturnUnify function exit nodes -predsimplifyPredicate Simplifier -prune-ehRemove unused exception handling info @@ -171,6 +171,8 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if ! -simplify-libcallsSimplify well-known library calls -simplifycfgSimplify the CFG -stripStrip all symbols from a module +-strip-dead-prototypesRemove unused function declarations +-sretpromotionPromote sret arguments -tailcallelimTail Call Elimination -tailduplicateTail Duplication @@ -179,7 +181,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if ! OptionName -deadarghaX0rDead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE) -extract-blocksExtract Basic Blocks From Module (for bugpoint use) --emitbitcodeBitcode Writer +-preverifyPreliminary module verification -verifyModule Verifier -view-cfgView CFG of function -view-cfg-onlyView CFG of function (with no function bodies) @@ -296,7 +298,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if !

- Basic Value Numbering (default GVN impl) + Basic Value Numbering (default Value Numbering impl)

@@ -305,39 +307,11 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if ! lexically identical expressions. This does not require any ahead of time analysis, so it is a very fast default implementation.

-
- - -
- Print a call graph -
-
-

- This pass, only available in opt, prints - the call graph into a .dot graph. This graph can then be processed with the - "dot" tool to convert it to postscript or some other suitable format. -

-
- - -
- Print SCCs of the Call Graph -
-

- This pass, only available in opt, prints - the SCCs of the call graph to standard output in a human-readable form. -

-
- - -
- Print SCCs of each function CFG -
-
-

- This pass, only available in opt, prints - the SCCs of each function CFG to standard output in a human-readable form. + The ValueNumbering analysis passes are mostly deprecated. They are only used + by the Global Common Subexpression Elimination pass, which + is deprecated by the Global Value Numbering pass (which + does its value numbering on its own).

@@ -405,14 +379,38 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if !

- Print external fn callsites passed constants + Print Call Graph to 'dot' file

- This pass, only available in opt, prints out call sites to - external functions that are called with constant arguments. This can be - useful when looking for standard library functions we should constant fold - or handle in alias analyses. + This pass, only available in opt, prints the call graph into a + .dot graph. This graph can then be processed with the "dot" tool + to convert it to postscript or some other suitable format. +

+
+ + +
+ Print CFG of function to 'dot' file +
+
+

+ This pass, only available in opt, prints the control flow graph + into a .dot graph. This graph can then be processed with the + "dot" tool to convert it to postscript or some other suitable format. +

+
+ + +
+ Print CFG of function to 'dot' file (with no function bodies) +
+
+

+ This pass, only available in opt, prints the control flow graph + into a .dot graph, omitting the function bodies. This graph can + then be processed with the "dot" tool to convert it to postscript or some + other suitable format.

@@ -495,7 +493,12 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if ! Memory Dependence Analysis

-

Yet to be written.

+

+ An analysis that determines, for a given memory operation, what preceding + memory operations it depends on. It builds on alias analysis information, and + tries to provide a lazy, caching interface to a common kind of alias + information query. +

@@ -503,7 +506,11 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if ! No Alias Analysis (always returns 'may' alias)

-

Yet to be written.

+

+ Always returns "I don't know" for alias queries. NoAA is unlike other alias + analysis implementations, in that it does not chain to a previous analysis. As + such it doesn't follow many of the rules that other alias analyses must. +

@@ -511,7 +518,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if ! No Profile Information

-

Yet to be written.

+

+ The default "no profile" implementation of the abstract + ProfileInfo interface. +

@@ -519,7 +529,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if ! Post-Dominance Frontier Construction

-

Yet to be written.

+

+ This pass is a simple post-dominator construction algorithm for finding + post-dominator frontiers. +

@@ -527,12 +540,15 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if ! Post-Dominator Tree Construction

-

Yet to be written.

+

+ This pass is a simple post-dominator construction algorithm for finding + post-dominators. +

- Print function to stderr + Alias Set Printer

Yet to be written.

@@ -540,50 +556,81 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if !

- Alias Set Printer + Print a call graph
-

Yet to be written.

+

+ This pass, only available in opt, prints the call graph to + standard output in a human-readable form. +

- Print Call Graph to 'dot' file + Print SCCs of the Call Graph
-

Yet to be written.

+

+ This pass, only available in opt, prints the SCCs of the call + graph to standard output in a human-readable form. +

- Print CFG of function to 'dot' file + Print SCCs of each function CFG
-

Yet to be written.

+

+ This pass, only available in opt, prints the SCCs of each + function CFG to standard output in a human-readable form. +

- Print CFG of function to 'dot' file (with no function bodies) + Print external fn callsites passed constants
-

Yet to be written.

+

+ This pass, only available in opt, prints out call sites to + external functions that are called with constant arguments. This can be + useful when looking for standard library functions we should constant fold + or handle in alias analyses. +

- Print module to stderr + Print function to stderr
-

Yet to be written.

+

+ The PrintFunctionPass class is designed to be pipelined with + other FunctionPasses, and prints out the functions of the module + as they are processed. +

- Find Used Types + Print module to stderr
-

Yet to be written.

+

+ This pass simply prints out the entire module when it is executed. +

+
+ + +
+ Find Used Types +
+
+

+ This pass is used to seek out all of the types in use by the program. Note + that this analysis explicitly does not include types only used by the symbol + table.

@@ -591,7 +638,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if ! Load profile information from llvmprof.out

-

Yet to be written.

+

+ A concrete implementation of profiling information that loads the information + from a profile dump file. +

@@ -599,7 +649,18 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if ! Scalar Evolution Analysis

-

Yet to be written.

+

+ The ScalarEvolution analysis can be used to analyze and + catagorize scalar expressions in loops. It specializes in recognizing general + induction variables, representing them with the abstract and opaque + SCEV class. Given this analysis, trip counts of loops and other + important properties can be obtained. +

+ +

+ This analysis is primarily useful for induction variable substitution and + strength reduction. +

@@ -607,7 +668,8 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if ! Target Data Layout

-

Yet to be written.

+

Provides other passes access to information on how the size and alignment + required by the the target ABI for various data types.

@@ -632,7 +694,30 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if ! Promote 'by reference' arguments to scalars

-

Yet to be written.

+

+ This pass promotes "by reference" arguments to be "by value" arguments. In + practice, this means looking for internal functions that have pointer + arguments. If it can prove, through the use of alias analysis, that an + argument is *only* loaded, then it can pass the value into the function + instead of the address of the value. This can cause recursive simplification + of code and lead to the elimination of allocas (especially in C++ template + code like the STL). +

+ +

+ This pass also handles aggregate arguments that are passed into a function, + scalarizing them if the elements of the aggregate are only loaded. Note that + it refuses to scalarize aggregates which would require passing in more than + three operands to the function, because passing thousands of operands for a + large array or structure is unprofitable! +

+ +

+ Note that this transformation could also be done for arguments that are only + stored to (returning the value instead), but does not currently. This case + would be best handled when and if LLVM starts supporting multiple return + values from functions. +

@@ -640,22 +725,11 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if ! Profile Guided Basic Block Placement

-

This pass implements a very simple profile guided basic block placement - algorithm. The idea is to put frequently executed blocks together at the - start of the function, and hopefully increase the number of fall-through - conditional branches. If there is no profile information for a particular - function, this pass basically orders blocks in depth-first order.

-

The algorithm implemented here is basically "Algo1" from "Profile Guided - Code Positioning" by Pettis and Hansen, except that it uses basic block - counts instead of edge counts. This could be improved in many ways, but is - very simple for now.

- -

Basically we "place" the entry block, then loop over all successors in a - DFO, placing the most frequently executed successor until we run out of - blocks. Did we mention that this was extremely simplistic? This is - also much slower than it could be. When it becomes important, this pass - will be rewritten to use a better algorithm, and then we can worry about - efficiency.

+

This pass is a very simple profile guided basic block placement algorithm. + The idea is to put frequently executed blocks together at the start of the + function and hopefully increase the number of fall-through conditional + branches. If there is no profile information for a particular function, this + pass basically orders blocks in depth-first order.

@@ -663,32 +737,22 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

\n" if ! Break critical edges in CFG

-

Yet to be written.

+

+ Break all of the critical edges in the CFG by inserting a dummy basic block. + It may be "required" by passes that cannot deal with critical edges. This + transformation obviously invalidates the CFG, but can update forward dominator + (set, immediate dominators, tree, and frontier) information. +

- Correlated Expression Elimination + Prepare a function for code generation
-

Correlated Expression Elimination propagates information from conditional - branches to blocks dominated by destinations of the branch. It propagates - information from the condition check itself into the body of the branch, - allowing transformations like these for example:

- -
-if (i == 7)
-  ... 4*i;  // constant propagation
-
-M = i+1; N = j+1;
-if (i == j)
-  X = M-N;  // = M-M == 0;
-
- -

This is called Correlated Expression Elimination because we eliminate or - simplify expressions that are correlated with the direction of a branch. In - this way we use static information to give us some information about the - dynamic value of a variable.

+ This pass munges the code in the input function to better prepare it for + SelectionDAG-based code generation. This works around limitations in it's + basic-block-at-a-time approach. It should eventually be removed.
@@ -705,7 +769,12 @@ if (i == j) Merge Duplicate Global Constants
-

Yet to be written.

+

+ Merges duplicate global constants together into a single constant that is + shared. This is useful because some passes (ie TraceValues) insert a lot of + string constants into the program, regardless of whether or not an existing + string is available. +

@@ -729,7 +798,11 @@ if (i == j) Dead Code Elimination
-

Yet to be written.

+

+ Dead code elimination is similar to dead instruction + elimination, but it rechecks instructions that were used by removed + instructions to see if they are newly dead. +

@@ -737,7 +810,17 @@ if (i == j) Dead Argument Elimination
-

Yet to be written.

+

+ This pass deletes dead arguments from internal functions. Dead argument + elimination removes arguments which are directly dead, as well as arguments + only passed into function calls as dead arguments of other functions. This + pass also deletes dead arguments in a similar way. +

+ +

+ This pass is often useful as a cleanup pass to run after aggressive + interprocedural passes, which add possibly-dead arguments. +

@@ -745,7 +828,11 @@ if (i == j) Dead Type Elimination
-

Yet to be written.

+

+ This pass is used to cleanup the output of GCC. It eliminate names for types + that are unused in the entire translation unit, using the find used types pass. +

@@ -753,7 +840,10 @@ if (i == j) Dead Instruction Elimination
-

Yet to be written.

+

+ Dead instruction elimination performs a single pass over the function, + removing instructions that are obviously dead. +

@@ -761,7 +851,10 @@ if (i == j) Dead Store Elimination
-

Yet to be written.

+

+ A trivial dead store elimination that only considers basic-block local + redundant stores. +

@@ -769,7 +862,16 @@ if (i == j) Global Common Subexpression Elimination
-

Yet to be written.

+

+ This pass is designed to be a very quick global transformation that + eliminates global common subexpressions from a function. It does this by + using an existing value numbering analysis pass to identify the common + subexpressions, eliminating them when possible. +

+

+ This pass is deprecated by the Global Value Numbering pass + (which does a better job with its own value numbering). +

@@ -777,7 +879,13 @@ if (i == j) Dead Global Elimination
-

Yet to be written.

+

+ This transform is designed to eliminate unreachable internal globals from the + program. It uses an aggressive algorithm, searching out globals that are + known to be alive. After it finds all of the globals which are needed, it + deletes whatever is left over. This allows it to delete recursive chunks of + the program which are unreachable. +

@@ -785,7 +893,11 @@ if (i == j) Global Variable Optimizer
-

Yet to be written.

+

+ This pass transforms simple global variables that never have their address + taken. If obviously true, it marks read/write globals as constant, deletes + variables only stored to, etc. +

@@ -797,6 +909,10 @@ if (i == j) This pass performs global value numbering to eliminate fully redundant instructions. It also performs simple dead load elimination.

+

+ Note that this pass does the value numbering itself, it does not use the + ValueNumbering analysis passes. +

@@ -814,6 +930,10 @@ if (i == j) live ranges, and should be used with caution on platforms that are very sensitive to register pressure.

+

+ Note that this pass does the value numbering itself, it does not use the + ValueNumbering analysis passes. +

@@ -821,7 +941,16 @@ if (i == j) Indirect Malloc and Free Removal
-

Yet to be written.

+

+ This pass finds places where memory allocation functions may escape into + indirect land. Some transforms are much easier (aka possible) only if free + or malloc are not called indirectly. +

+ +

+ Thus find places where the address of memory functions are taken and construct + bounce functions with direct calls of those functions. +

@@ -829,7 +958,50 @@ if (i == j) Canonicalize Induction Variables
-

Yet to be written.

+

+ This transformation analyzes and transforms the induction variables (and + computations derived from them) into simpler forms suitable for subsequent + analysis and transformation. +

+ +

+ This transformation makes the following changes to each loop with an + identifiable induction variable: +

+ +
    +
  1. All loops are transformed to have a single canonical + induction variable which starts at zero and steps by one.
  2. +
  3. The canonical induction variable is guaranteed to be the first PHI node + in the loop header block.
  4. +
  5. Any pointer arithmetic recurrences are raised to use array + subscripts.
  6. +
+ +

+ If the trip count of a loop is computable, this pass also makes the following + changes: +

+ +
    +
  1. The exit condition for the loop is canonicalized to compare the + induction value against the exit value. This turns loops like: +
    for (i = 7; i*i < 1000; ++i)
    + into +
    for (i = 0; i != 25; ++i)
  2. +
  3. Any use outside of the loop of an expression derived from the indvar + is changed to compute the derived value outside of the loop, eliminating + the dependence on the exit value of the induction variable. If the only + purpose of the loop is to compute the exit value of some derived + expression, this transformation will make the loop dead.
  4. +
+ +

+ This transformation should be followed by strength reduction after all of the + desired loop transformations have been performed. Additionally, on targets + where it is profitable, the loop could be transformed to count down to zero + (the "do loop" optimization). +

@@ -837,7 +1009,9 @@ if (i == j) Function Integration/Inlining
-

Yet to be written.

+

+ Bottom-up inlining of functions into callees. +

@@ -845,7 +1019,18 @@ if (i == j) Insert instrumentation for block profiling
-

Yet to be written.

+

+ This pass instruments the specified program with counters for basic block + profiling, which counts the number of times each basic block executes. This + is the most basic form of profiling, which can tell which blocks are hot, but + cannot reliably detect hot paths through the CFG. +

+ +

+ Note that this implementation is very naïve. Control equivalent regions of + the CFG should not require duplicate counters, but it does put duplicate + counters in. +

@@ -853,7 +1038,17 @@ if (i == j) Insert instrumentation for edge profiling
-

Yet to be written.

+

+ This pass instruments the specified program with counters for edge profiling. + Edge profiling can give a reasonable approximation of the hot paths through a + program, and is used for a wide variety of program transformations. +

+ +

+ Note that this implementation is very naïve. It inserts a counter for + every edge in the program, instead of using control flow information + to prune the number of counters inserted. +

@@ -861,7 +1056,10 @@ if (i == j) Insert instrumentation for function profiling
-

Yet to be written.

+

+ This pass instruments the specified program with counters for function + profiling, which counts the number of times each function is called. +

@@ -869,7 +1067,11 @@ if (i == j) Measure profiling framework overhead
-

Yet to be written.

+

+ The basic profiler that does nothing. It is the default profiler and thus + terminates RSProfiler chains. It is useful for measuring + framework overhead. +

@@ -877,7 +1079,20 @@ if (i == j) Insert random sampling instrumentation framework
-

Yet to be written.

+

+ The second stage of the random-sampling instrumentation framework, duplicates + all instructions in a function, ignoring the profiling code, then connects the + two versions together at the entry and at backedges. At each connection point + a choice is made as to whether to jump to the profiled code (take a sample) or + execute the unprofiled code. +

+ +

+ After this pass, it is highly recommended to runmem2reg + and adce. instcombine, + load-vn, gdce, and + dse also are good to run afterwards. +

@@ -885,7 +1100,53 @@ if (i == j) Combine redundant instructions
-

Yet to be written.

+

+ Combine instructions to form fewer, simple + instructions. This pass does not modify the CFG This pass is where algebraic + simplification happens. +

+ +

+ This pass combines things like: +

+ +
%Y = add i32 %X, 1
+%Z = add i32 %Y, 1
+ +

+ into: +

+ +
%Z = add i32 %X, 2
+ +

+ This is a simple worklist driven algorithm. +

+ +

+ This pass guarantees that the following canonicalizations are performed on + the program: +

+ +
@@ -893,7 +1154,11 @@ if (i == j) Internalize Global Symbols
-

Yet to be written.

+

+ This pass loops over all of the functions in the input module, looking for a + main function. If a main function is found, all other functions and all + global variables with initializers are marked as internal. +

@@ -901,7 +1166,13 @@ if (i == j) Interprocedural constant propagation
-

Yet to be written.

+

+ This pass implements an extremely simple interprocedural constant + propagation pass. It could certainly be improved in many different ways, + like using a worklist. This pass makes arguments dead, but does not remove + them. The existing dead argument elimination pass should be run after this + to clean up the mess. +

@@ -909,7 +1180,39 @@ if (i == j) Interprocedural Sparse Conditional Constant Propagation
-

Yet to be written.

+

+ An interprocedural variant of Sparse Conditional Constant + Propagation. +

+
+ + +
+ Thread control through conditional blocks +
+
+

+ Jump threading tries to find distinct threads of control flow running through + a basic block. This pass looks at blocks that have multiple predecessors and + multiple successors. If one or more of the predecessors of the block can be + proven to always cause a jump to one of the successors, we forward the edge + from the predecessor to the successor by duplicating the contents of this + block. +

+

+ An example of when this can occur is code like this: +

+ +
if () { ...
+  X = 4;
+}
+if (X < 3) {
+ +

+ In this case, the unconditional branch at the end of the first if can be + revectored to the false side of the second if. +

@@ -917,7 +1220,28 @@ if (i == j) Loop-Closed SSA Form Pass
-

Yet to be written.

+

+ This pass transforms loops by placing phi nodes at the end of the loops for + all values that are live across the loop boundary. For example, it turns + the left into the right code: +

+ +
for (...)                for (...)
+  if (c)                   if (c)
+    X1 = ...                 X1 = ...
+  else                     else
+    X2 = ...                 X2 = ...
+  X3 = phi(X1, X2)         X3 = phi(X1, X2)
+... = X3 + 4              X4 = phi(X3)
+                          ... = X4 + 4
+ +

+ This is still valid LLVM; the extra phi nodes are purely redundant, and will + be trivially eliminated by InstCombine. The major benefit of + this transformation is that it makes many other loop optimizations, such as + LoopUnswitching, simpler. +

@@ -925,7 +1249,48 @@ if (i == j) Loop Invariant Code Motion
-

Yet to be written.

+

+ This pass performs loop invariant code motion, attempting to remove as much + code from the body of a loop as possible. It does this by either hoisting + code into the preheader block, or by sinking code to the exit blocks if it is + safe. This pass also promotes must-aliased memory locations in the loop to + live in registers, thus hoisting and sinking "invariant" loads and stores. +

+ +

+ This pass uses alias analysis for two purposes: +

+ + +
+ +
+ Dead Loop Deletion Pass +
+
+

+ This file implements the Dead Loop Deletion Pass. This pass is responsible + for eliminating loops with non-infinite computable trip counts that have no + side effects or volatile instructions, and do not contribute to the + computation of the function's return value. +

@@ -933,7 +1298,12 @@ if (i == j) Extract loops into new functions
-

Yet to be written.

+

+ A pass wrapper around the ExtractLoop() scalar transformation to + extract each top-level loop into its own new function. If the loop is the + only loop in a given function, it is not touched. This is a pass most + useful for debugging via bugpoint. +

@@ -941,7 +1311,11 @@ if (i == j) Extract at most one loop into a new function
-

Yet to be written.

+

+ Similar to Extract loops into new functions, + this pass extracts one natural loop from the program into a function if it + can. This is used by bugpoint. +

@@ -949,7 +1323,10 @@ if (i == j) Index Split Loops
-

Yet to be written.

+

+ This pass divides loop's iteration range by spliting loop such that each + individual loop is executed efficiently. +

@@ -957,7 +1334,13 @@ if (i == j) Loop Strength Reduction
-

Yet to be written.

+

+ This pass performs a strength reduction on array references inside loops that + have as one or more of their components the loop induction variable. This is + accomplished by creating a new value to hold the initial value of the array + access for the first iteration, and then creating a new GEP instruction in + the loop to increment the value by the appropriate amount. +

@@ -965,7 +1348,7 @@ if (i == j) Rotate Loops
-

Yet to be written.

+

A simple loop rotation transformation.

@@ -973,7 +1356,11 @@ if (i == j) Unroll loops
-

Yet to be written.

+

+ This pass implements a simple loop unroller. It works best when loops have + been canonicalized by the -indvars pass, + allowing it to determine the trip counts of loops easily. +

@@ -981,7 +1368,29 @@ if (i == j) Unswitch loops
-

Yet to be written.

+

+ This pass transforms loops that contain branches on loop-invariant conditions + to have multiple loops. For example, it turns the left into the right code: +

+ +
for (...)                  if (lic)
+  A                          for (...)
+  if (lic)                     A; B; C
+    B                      else
+  C                          for (...)
+                               A; C
+ +

+ This can increase the size of the code exponentially (doubling it every time + a loop is unswitched) so we only unswitch if the resultant code will be + smaller than a threshold. +

+ +

+ This pass expects LICM to be run before it to hoist invariant conditions out + of the loop, to make the unswitching opportunity obvious. +

@@ -989,15 +1398,40 @@ if (i == j) Canonicalize natural loops
-

Yet to be written.

-
- - -
- lowers packed operations to operations on smaller packed datatypes -
-
-

Yet to be written.

+

+ This pass performs several transformations to transform natural loops into a + simpler form, which makes subsequent analyses and transformations simpler and + more effective. +

+ +

+ Loop pre-header insertion guarantees that there is a single, non-critical + entry edge from outside of the loop to the loop header. This simplifies a + number of analyses and transformations, such as LICM. +

+ +

+ Loop exit-block insertion guarantees that all exit blocks from the loop + (blocks which are outside of the loop that have predecessors inside of the + loop) only have predecessors from inside of the loop (and are thus dominated + by the loop header). This simplifies transformations such as store-sinking + that are built into LICM. +

+ +

+ This pass also guarantees that loops will have exactly one backedge. +

+ +

+ Note that the simplifycfg pass will clean up blocks which are split out but + end up being unnecessary, so usage of this pass should not pessimize + generated code. +

+ +

+ This pass obviously modifies the CFG, but updates loop information and + dominator information. +

@@ -1005,15 +1439,15 @@ if (i == j) Lower allocations from instructions to calls
-

Yet to be written.

-
+

+ Turn malloc and free instructions into @malloc and + @free calls. +

- -
- Lower GC intrinsics, for GCless code generators -
-
-

Yet to be written.

+

+ This is a target-dependent tranformation because it depends on the size of + data types and alignment constraints. +

@@ -1021,39 +1455,108 @@ if (i == j) Lower invoke and unwind, for unwindless code generators
-

Yet to be written.

+

+ This transformation is designed for use by code generators which do not yet + support stack unwinding. This pass supports two models of exception handling + lowering, the 'cheap' support and the 'expensive' support. +

+ +

+ 'Cheap' exception handling support gives the program the ability to execute + any program which does not "throw an exception", by turning 'invoke' + instructions into calls and by turning 'unwind' instructions into calls to + abort(). If the program does dynamically use the unwind instruction, the + program will print a message then abort. +

+ +

+ 'Expensive' exception handling support gives the full exception handling + support to the program at the cost of making the 'invoke' instruction + really expensive. It basically inserts setjmp/longjmp calls to emulate the + exception handling as necessary. +

+ +

+ Because the 'expensive' support slows down programs a lot, and EH is only + used for a subset of the programs, it must be specifically enabled by the + -enable-correct-eh-support option. +

+ +

+ Note that after this pass runs the CFG is not entirely accurate (exceptional + control flow edges are not correct anymore) so only very simple things should + be done after the lowerinvoke pass has run (like generation of native code). + This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't + support the invoke instruction yet" lowering pass. +

- Lower select instructions to branches + Lower Set Jump
-

Yet to be written.

+

+ Lowers setjmp and longjmp to use the LLVM invoke and unwind + instructions as necessary. +

+ +

+ Lowering of longjmp is fairly trivial. We replace the call with a + call to the LLVM library function __llvm_sjljeh_throw_longjmp(). + This unwinds the stack for us calling all of the destructors for + objects allocated on the stack. +

+ +

+ At a setjmp call, the basic block is split and the setjmp + removed. The calls in a function that have a setjmp are converted to + invoke where the except part checks to see if it's a longjmp + exception and, if so, if it's handled in the function. If it is, then it gets + the value returned by the longjmp and goes to where the basic block + was split. invoke instructions are handled in a similar fashion with + the original except block being executed if it isn't a longjmp + except that is handled by that function. +

- Lower Set Jump + Lower SwitchInst's to branches
-

Yet to be written.

+

+ Rewrites switch instructions with a sequence of branches, which + allows targets to get away with not implementing the switch instruction until + it is convenient. +

- Lower SwitchInst's to branches + Promote Memory to Register
-

Yet to be written.

+

+ This file promotes memory references to be register references. It promotes + alloca instructions which only have loads and + stores as uses. An alloca is transformed by using dominator + frontiers to place phi nodes, then traversing the function in + depth-first order to rewrite loads and stores as + appropriate. This is just the standard SSA construction algorithm to construct + "pruned" SSA form. +

- Promote Memory to Register + Optimize use of memcpy and friend
-

Yet to be written.

+

+ This pass performs various transformations related to eliminating memcpy + calls, or transforming sets of stores into memset's. +

@@ -1061,7 +1564,10 @@ if (i == j) Unify function exit nodes
-

Yet to be written.

+

+ Ensure that functions have at most one ret instruction in them. + Additionally, it keeps track of which node is the new exit node of the CFG. +

@@ -1069,7 +1575,21 @@ if (i == j) Predicate Simplifier
-

Yet to be written.

+

+ Path-sensitive optimizer. In a branch where x == y, replace uses of + x with y. Permits further optimization, such as the + elimination of the unreachable call: +

+ +
void test(int *p, int *q)
+{
+  if (p != q)
+    return;
+
+  if (*p != *q)
+    foo(); // unreachable
+}
@@ -1077,7 +1597,12 @@ if (i == j) Remove unused exception handling info
-

Yet to be written.

+

+ This file implements a simple interprocedural pass which walks the call-graph, + turning invoke instructions into call instructions if and + only if the callee cannot throw an exception. It implements this as a + bottom-up traversal of the call-graph. +

@@ -1085,7 +1610,10 @@ if (i == j) Raise allocations from calls to instructions
-

Yet to be written.

+

+ Converts @malloc and @free calls to malloc and + free instructions. +

@@ -1093,7 +1621,22 @@ if (i == j) Reassociate expressions
-

Yet to be written.

+

+ This pass reassociates commutative expressions in an order that is designed + to promote better constant propagation, GCSE, LICM, PRE, etc. +

+ +

+ For example: 4 + (x + 5) ⇒ x + (4 + 5) +

+ +

+ In the implementation of this algorithm, constants are assigned rank = 0, + function arguments are rank = 1, and other values are assigned ranks + corresponding to the reverse post order traversal of current function + (starting at 2), which effectively gives values in deep loops higher rank + than values not in loops. +

@@ -1101,7 +1644,16 @@ if (i == j) Demote all values to stack slots
-

Yet to be written.

+

+ This file demotes all registers to memory references. It is intented to be + the inverse of -mem2reg. By converting to + load instructions, the only values live accross basic blocks are + alloca instructions and load instructions before + phi nodes. It is intended that this should make CFG hacking much + easier. To make later hacking easier, the entry block is split into two, such + that all introduced alloca instructions (and nothing else) are in the + entry block. +

@@ -1109,7 +1661,21 @@ if (i == j) Scalar Replacement of Aggregates
-

Yet to be written.

+

+ The well-known scalar replacement of aggregates transformation. This + transform breaks up alloca instructions of aggregate type (structure + or array) into individual alloca instructions for each member if + possible. Then, if possible, it transforms the individual alloca + instructions into nice clean scalar SSA form. +

+ +

+ This combines a simple scalar replacement of aggregates algorithm with the mem2reg algorithm because often interact, + especially for C++ programs. As such, iterating between scalarrepl, + then mem2reg until we run out of things to + promote works well. +

@@ -1117,7 +1683,22 @@ if (i == j) Sparse Conditional Constant Propagation
-

Yet to be written.

+

+ Sparse conditional constant propagation and merging, which can be summarized + as: +

+ +
    +
  1. Assumes values are constant unless proven otherwise
  2. +
  3. Assumes BasicBlocks are dead unless proven otherwise
  4. +
  5. Proves values to be constant, and replaces them with constants
  6. +
  7. Proves conditional branches to be unconditional
  8. +
+ +

+ Note that this pass has a habit of making definitions be dead. It is a good + idea to to run a DCE pass sometime after running this pass. +

@@ -1125,7 +1706,12 @@ if (i == j) Simplify well-known library calls
-

Yet to be written.

+

+ Applies a variety of small optimizations for calls to specific well-known + function calls (e.g. runtime library functions). For example, a call + exit(3) that occurs within the main() function can be + transformed into simply return 3. +

@@ -1133,7 +1719,18 @@ if (i == j) Simplify the CFG
-

Yet to be written.

+

+ Performs dead code elimination and basic block merging. Specifically: +

+ +
    +
  1. Removes basic blocks with no predecessors.
  2. +
  3. Merges a basic block into its predecessor if there is only one and the + predecessor only has one successor.
  4. +
  5. Eliminates PHI nodes for basic blocks with a single predecessor.
  6. +
  7. Eliminates a basic block that only contains an unconditional + branch.
  8. +
@@ -1141,7 +1738,57 @@ if (i == j) Strip all symbols from a module
-

Yet to be written.

+

+ Performs code stripping. This transformation can delete: +

+ +
    +
  1. names for virtual registers
  2. +
  3. symbols for internal globals and functions
  4. +
  5. debug information
  6. +
+ +

+ Note that this transformation makes code much less readable, so it should + only be used in situations where the strip utility would be used, + such as reducing code size or making it harder to reverse engineer code. +

+
+ + +
+ Remove unused function declarations +
+
+

+ This pass loops over all of the functions in the input module, looking for + dead declarations and removes them. Dead declarations are declarations of + functions for which no implementation is available (i.e., declarations for + unused library functions). +

+
+ + +
+ Promote sret arguments +
+
+

+ This pass finds functions that return a struct (using a pointer to the struct + as the first argument of the function, marked with the 'sret' attribute) and + replaces them with a new function that simply returns each of the elements of + that struct (using multiple return values). +

+ +

+ This pass works under a number of conditions: +

+ +
@@ -1149,7 +1796,31 @@ if (i == j) Tail Call Elimination
-

Yet to be written.

+

+ This file transforms calls of the current function (self recursion) followed + by a return instruction with a branch to the entry of the function, creating + a loop. This pass also implements the following extensions to the basic + algorithm: +

+ +
@@ -1157,7 +1828,13 @@ if (i == j) Tail Duplication
-

Yet to be written.

+

+ This pass performs a limited form of tail duplication, intended to simplify + CFGs by removing some unconditional branches. This pass is necessary to + straighten out loops created by the C front-end, but also is capable of + making other code nicer. After this pass is run, the CFG simplify pass + should be run to clean up the mess. +

@@ -1171,7 +1848,10 @@ if (i == j) Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)
-

Yet to be written.

+

+ Same as dead argument elimination, but deletes arguments to functions which + are external. This is only for use by bugpoint.

@@ -1179,15 +1859,25 @@ if (i == j) Extract Basic Blocks From Module (for bugpoint use)
-

Yet to be written.

+

+ This pass is used by bugpoint to extract all blocks from the module into their + own functions.

- Bitcode Writer + Preliminary module verification
-

Yet to be written.

+

+ Ensures that the module is in the form required by the Module Verifier pass. +

+ +

+ Running the verifier runs this pass automatically, so there should be no need + to use it directly. +

@@ -1195,7 +1885,50 @@ if (i == j) Module Verifier
-

Yet to be written.

+

+ Verifies an LLVM IR code. This is useful to run after an optimization which is + undergoing testing. Note that llvm-as verifies its input before + emitting bitcode, and also that malformed bitcode is likely to make LLVM + crash. All language front-ends are therefore encouraged to verify their output + before performing optimizing transformations. +

+ + + +

+ Note that this does not provide full security verification (like Java), but + instead just tries to ensure that code is well-formed. +

@@ -1203,7 +1936,9 @@ if (i == j) View CFG of function
-

Yet to be written.

+

+ Displays the control flow graph using the GraphViz tool. +

@@ -1211,7 +1946,10 @@ if (i == j) View CFG of function (with no function bodies)
-

Yet to be written.

+

+ Displays the control flow graph using the GraphViz tool, but omitting function + bodies. +

@@ -1219,9 +1957,9 @@ if (i == j)
Valid CSS! + src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"> Valid HTML 4.01! + src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Reid Spencer
LLVM Compiler Infrastructure