1 //===- RSProfiling.cpp - Various profiling using random sampling ----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // These passes implement a random sampling based profiling. Different methods
11 // of choosing when to sample are supported, as well as different types of
12 // profiling. This is done as two passes. The first is a sequence of profiling
13 // passes which insert profiling into the program, and remember what they
16 // The second stage duplicates all instructions in a function, ignoring the
17 // profiling code, then connects the two versions togeather at the entry and at
18 // backedges. At each connection point a choice is made as to whether to jump
19 // to the profiled code (take a sample) or execute the unprofiled code.
21 // It is highly recommended that after this pass one runs mem2reg and adce
22 // (instcombine load-vn gdce dse also are good to run afterwards)
24 // This design is intended to make the profiling passes independent of the RS
25 // framework, but any profiling pass that implements the RSProfiling interface
26 // is compatible with the rs framework (and thus can be sampled)
28 // TODO: obviously the block and function profiling are almost identical to the
29 // existing ones, so they can be unified (esp since these passes are valid
30 // without the rs framework).
31 // TODO: Fix choice code so that frequency is not hard coded
33 //===----------------------------------------------------------------------===//
35 #include "llvm/Pass.h"
36 #include "llvm/LLVMContext.h"
37 #include "llvm/Module.h"
38 #include "llvm/Instructions.h"
39 #include "llvm/Constants.h"
40 #include "llvm/DerivedTypes.h"
41 #include "llvm/Intrinsics.h"
42 #include "llvm/Transforms/Scalar.h"
43 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
44 #include "llvm/Support/CommandLine.h"
45 #include "llvm/Support/Compiler.h"
46 #include "llvm/Support/Debug.h"
47 #include "llvm/Transforms/Instrumentation.h"
48 #include "RSProfiling.h"
60 static cl::opt<RandomMeth> RandomMethod("profile-randomness",
61 cl::desc("How to randomly choose to profile:"),
63 clEnumValN(GBV, "global", "global counter"),
64 clEnumValN(GBVO, "ra_global",
65 "register allocated global counter"),
66 clEnumValN(HOSTCC, "rdcc", "cycle counter"),
70 /// NullProfilerRS - The basic profiler that does nothing. It is the default
71 /// profiler and thus terminates RSProfiler chains. It is useful for
72 /// measuring framework overhead
73 class VISIBILITY_HIDDEN NullProfilerRS : public RSProfilers {
75 static char ID; // Pass identification, replacement for typeid
76 bool isProfiling(Value* v) {
79 bool runOnModule(Module &M) {
82 void getAnalysisUsage(AnalysisUsage &AU) const {
88 static RegisterAnalysisGroup<RSProfilers> A("Profiling passes");
89 static RegisterPass<NullProfilerRS> NP("insert-null-profiling-rs",
90 "Measure profiling framework overhead");
91 static RegisterAnalysisGroup<RSProfilers, true> NPT(NP);
94 /// Chooser - Something that chooses when to make a sample of the profiled code
95 class VISIBILITY_HIDDEN Chooser {
97 /// ProcessChoicePoint - is called for each basic block inserted to choose
98 /// between normal and sample code
99 virtual void ProcessChoicePoint(BasicBlock*) = 0;
100 /// PrepFunction - is called once per function before other work is done.
101 /// This gives the opertunity to insert new allocas and such.
102 virtual void PrepFunction(Function*) = 0;
103 virtual ~Chooser() {}
106 //Things that implement sampling policies
107 //A global value that is read-mod-stored to choose when to sample.
108 //A sample is taken when the global counter hits 0
109 class VISIBILITY_HIDDEN GlobalRandomCounter : public Chooser {
110 GlobalVariable* Counter;
112 const IntegerType* T;
114 GlobalRandomCounter(Module& M, const IntegerType* t, uint64_t resetval);
115 virtual ~GlobalRandomCounter();
116 virtual void PrepFunction(Function* F);
117 virtual void ProcessChoicePoint(BasicBlock* bb);
120 //Same is GRC, but allow register allocation of the global counter
121 class VISIBILITY_HIDDEN GlobalRandomCounterOpt : public Chooser {
122 GlobalVariable* Counter;
125 const IntegerType* T;
127 GlobalRandomCounterOpt(Module& M, const IntegerType* t, uint64_t resetval);
128 virtual ~GlobalRandomCounterOpt();
129 virtual void PrepFunction(Function* F);
130 virtual void ProcessChoicePoint(BasicBlock* bb);
133 //Use the cycle counter intrinsic as a source of pseudo randomness when
134 //deciding when to sample.
135 class VISIBILITY_HIDDEN CycleCounter : public Chooser {
139 CycleCounter(Module& m, uint64_t resetmask);
140 virtual ~CycleCounter();
141 virtual void PrepFunction(Function* F);
142 virtual void ProcessChoicePoint(BasicBlock* bb);
145 /// ProfilerRS - Insert the random sampling framework
146 struct VISIBILITY_HIDDEN ProfilerRS : public FunctionPass {
147 static char ID; // Pass identification, replacement for typeid
148 ProfilerRS() : FunctionPass(&ID) {}
150 std::map<Value*, Value*> TransCache;
151 std::set<BasicBlock*> ChoicePoints;
154 //Translate and duplicate values for the new profile free version of stuff
155 Value* Translate(Value* v);
156 //Duplicate an entire function (with out profiling)
157 void Duplicate(Function& F, RSProfilers& LI);
158 //Called once for each backedge, handle the insertion of choice points and
159 //the interconection of the two versions of the code
160 void ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F);
161 bool runOnFunction(Function& F);
162 bool doInitialization(Module &M);
163 virtual void getAnalysisUsage(AnalysisUsage &AU) const;
167 static RegisterPass<ProfilerRS>
168 X("insert-rs-profiling-framework",
169 "Insert random sampling instrumentation framework");
171 char RSProfilers::ID = 0;
172 char NullProfilerRS::ID = 0;
173 char ProfilerRS::ID = 0;
176 static void ReplacePhiPred(BasicBlock* btarget,
177 BasicBlock* bold, BasicBlock* bnew);
179 static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc);
182 static void recBackEdge(BasicBlock* bb, T& BackEdges,
183 std::map<BasicBlock*, int>& color,
184 std::map<BasicBlock*, int>& depth,
185 std::map<BasicBlock*, int>& finish,
188 //find the back edges and where they go to
190 static void getBackEdges(Function& F, T& BackEdges);
193 ///////////////////////////////////////
194 // Methods of choosing when to profile
195 ///////////////////////////////////////
197 GlobalRandomCounter::GlobalRandomCounter(Module& M, const IntegerType* t,
198 uint64_t resetval) : T(t) {
199 ConstantInt* Init = M.getContext().getConstantInt(T, resetval);
201 Counter = new GlobalVariable(M, T, false, GlobalValue::InternalLinkage,
202 Init, "RandomSteeringCounter");
205 GlobalRandomCounter::~GlobalRandomCounter() {}
207 void GlobalRandomCounter::PrepFunction(Function* F) {}
209 void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) {
210 BranchInst* t = cast<BranchInst>(bb->getTerminator());
211 LLVMContext *Context = bb->getContext();
214 LoadInst* l = new LoadInst(Counter, "counter", t);
216 ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l,
217 Context->getConstantInt(T, 0),
220 Value* nv = BinaryOperator::CreateSub(l, Context->getConstantInt(T, 1),
222 new StoreInst(nv, Counter, t);
226 BasicBlock* oldnext = t->getSuccessor(0);
227 BasicBlock* resetblock = BasicBlock::Create("reset", oldnext->getParent(),
229 TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock);
230 t->setSuccessor(0, resetblock);
231 new StoreInst(ResetValue, Counter, t2);
232 ReplacePhiPred(oldnext, bb, resetblock);
235 GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const IntegerType* t,
238 ConstantInt* Init = M.getContext().getConstantInt(T, resetval);
240 Counter = new GlobalVariable(M, T, false, GlobalValue::InternalLinkage,
241 Init, "RandomSteeringCounter");
244 GlobalRandomCounterOpt::~GlobalRandomCounterOpt() {}
246 void GlobalRandomCounterOpt::PrepFunction(Function* F) {
247 //make a local temporary to cache the global
248 BasicBlock& bb = F->getEntryBlock();
249 BasicBlock::iterator InsertPt = bb.begin();
250 AI = new AllocaInst(T, 0, "localcounter", InsertPt);
251 LoadInst* l = new LoadInst(Counter, "counterload", InsertPt);
252 new StoreInst(l, AI, InsertPt);
254 //modify all functions and return values to restore the local variable to/from
255 //the global variable
256 for(Function::iterator fib = F->begin(), fie = F->end();
258 for(BasicBlock::iterator bib = fib->begin(), bie = fib->end();
260 if (isa<CallInst>(bib)) {
261 LoadInst* l = new LoadInst(AI, "counter", bib);
262 new StoreInst(l, Counter, bib);
263 l = new LoadInst(Counter, "counter", ++bib);
264 new StoreInst(l, AI, bib--);
265 } else if (isa<InvokeInst>(bib)) {
266 LoadInst* l = new LoadInst(AI, "counter", bib);
267 new StoreInst(l, Counter, bib);
269 BasicBlock* bb = cast<InvokeInst>(bib)->getNormalDest();
270 BasicBlock::iterator i = bb->getFirstNonPHI();
271 l = new LoadInst(Counter, "counter", i);
273 bb = cast<InvokeInst>(bib)->getUnwindDest();
274 i = bb->getFirstNonPHI();
275 l = new LoadInst(Counter, "counter", i);
276 new StoreInst(l, AI, i);
277 } else if (isa<UnwindInst>(&*bib) || isa<ReturnInst>(&*bib)) {
278 LoadInst* l = new LoadInst(AI, "counter", bib);
279 new StoreInst(l, Counter, bib);
283 void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) {
284 BranchInst* t = cast<BranchInst>(bb->getTerminator());
285 LLVMContext *Context = bb->getContext();
288 LoadInst* l = new LoadInst(AI, "counter", t);
290 ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l,
291 Context->getConstantInt(T, 0),
294 Value* nv = BinaryOperator::CreateSub(l, Context->getConstantInt(T, 1),
296 new StoreInst(nv, AI, t);
300 BasicBlock* oldnext = t->getSuccessor(0);
301 BasicBlock* resetblock = BasicBlock::Create("reset", oldnext->getParent(),
303 TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock);
304 t->setSuccessor(0, resetblock);
305 new StoreInst(ResetValue, AI, t2);
306 ReplacePhiPred(oldnext, bb, resetblock);
310 CycleCounter::CycleCounter(Module& m, uint64_t resetmask) : rm(resetmask) {
311 F = Intrinsic::getDeclaration(&m, Intrinsic::readcyclecounter);
314 CycleCounter::~CycleCounter() {}
316 void CycleCounter::PrepFunction(Function* F) {}
318 void CycleCounter::ProcessChoicePoint(BasicBlock* bb) {
319 BranchInst* t = cast<BranchInst>(bb->getTerminator());
320 LLVMContext *Context = bb->getContext();
322 CallInst* c = CallInst::Create(F, "rdcc", t);
324 BinaryOperator::CreateAnd(c, Context->getConstantInt(Type::Int64Ty, rm),
327 ICmpInst *s = new ICmpInst(ICmpInst::ICMP_EQ, b,
328 Context->getConstantInt(Type::Int64Ty, 0),
334 ///////////////////////////////////////
336 ///////////////////////////////////////
337 bool RSProfilers_std::isProfiling(Value* v) {
338 if (profcode.find(v) != profcode.end())
341 RSProfilers& LI = getAnalysis<RSProfilers>();
342 return LI.isProfiling(v);
345 void RSProfilers_std::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
346 GlobalValue *CounterArray) {
347 // Insert the increment after any alloca or PHI instructions...
348 BasicBlock::iterator InsertPos = BB->getFirstNonPHI();
349 while (isa<AllocaInst>(InsertPos))
352 // Create the getelementptr constant expression
353 std::vector<Constant*> Indices(2);
354 Indices[0] = Context->getNullValue(Type::Int32Ty);
355 Indices[1] = Context->getConstantInt(Type::Int32Ty, CounterNum);
356 Constant *ElementPtr = Context->getConstantExprGetElementPtr(CounterArray,
359 // Load, increment and store the value back.
360 Value *OldVal = new LoadInst(ElementPtr, "OldCounter", InsertPos);
361 profcode.insert(OldVal);
362 Value *NewVal = BinaryOperator::CreateAdd(OldVal,
363 Context->getConstantInt(Type::Int32Ty, 1),
364 "NewCounter", InsertPos);
365 profcode.insert(NewVal);
366 profcode.insert(new StoreInst(NewVal, ElementPtr, InsertPos));
369 void RSProfilers_std::getAnalysisUsage(AnalysisUsage &AU) const {
370 //grab any outstanding profiler, or get the null one
371 AU.addRequired<RSProfilers>();
374 ///////////////////////////////////////
376 ///////////////////////////////////////
378 Value* ProfilerRS::Translate(Value* v) {
380 return TransCache[v];
382 if (BasicBlock* bb = dyn_cast<BasicBlock>(v)) {
383 if (bb == &bb->getParent()->getEntryBlock())
384 TransCache[bb] = bb; //don't translate entry block
386 TransCache[bb] = BasicBlock::Create("dup_" + bb->getName(),
387 bb->getParent(), NULL);
388 return TransCache[bb];
389 } else if (Instruction* i = dyn_cast<Instruction>(v)) {
390 //we have already translated this
391 //do not translate entry block allocas
392 if(&i->getParent()->getParent()->getEntryBlock() == i->getParent()) {
397 Instruction* i2 = i->clone();
399 i2->setName("dup_" + i->getName());
402 for (unsigned x = 0; x < i2->getNumOperands(); ++x)
403 i2->setOperand(x, Translate(i2->getOperand(x)));
406 } else if (isa<Function>(v) || isa<Constant>(v) || isa<Argument>(v)) {
410 assert(0 && "Value not handled");
414 void ProfilerRS::Duplicate(Function& F, RSProfilers& LI)
416 //perform a breadth first search, building up a duplicate of the code
417 std::queue<BasicBlock*> worklist;
418 std::set<BasicBlock*> seen;
420 //This loop ensures proper BB order, to help performance
421 for (Function::iterator fib = F.begin(), fie = F.end(); fib != fie; ++fib)
423 while (!worklist.empty()) {
424 Translate(worklist.front());
428 //remember than reg2mem created a new entry block we don't want to duplicate
429 worklist.push(F.getEntryBlock().getTerminator()->getSuccessor(0));
430 seen.insert(&F.getEntryBlock());
432 while (!worklist.empty()) {
433 BasicBlock* bb = worklist.front();
435 if(seen.find(bb) == seen.end()) {
436 BasicBlock* bbtarget = cast<BasicBlock>(Translate(bb));
437 BasicBlock::InstListType& instlist = bbtarget->getInstList();
438 for (BasicBlock::iterator iib = bb->begin(), iie = bb->end();
441 if (!LI.isProfiling(&*iib)) {
442 Instruction* i = cast<Instruction>(Translate(iib));
443 instlist.insert(bbtarget->end(), i);
446 //updated search state;
448 TerminatorInst* ti = bb->getTerminator();
449 for (unsigned x = 0; x < ti->getNumSuccessors(); ++x) {
450 BasicBlock* bbs = ti->getSuccessor(x);
451 if (seen.find(bbs) == seen.end()) {
459 void ProfilerRS::ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F) {
460 //given a backedge from B -> A, and translations A' and B',
462 //b: add branches in C to A and A' and in C' to A and A'
463 //c: mod terminators@B, replace A with C
464 //d: mod terminators@B', replace A' with C'
465 //e: mod phis@A for pred B to be pred C
466 // if multiple entries, simplify to one
467 //f: mod phis@A' for pred B' to be pred C'
468 // if multiple entries, simplify to one
469 //g: for all phis@A with pred C using x
470 // add in edge from C' using x'
471 // add in edge from C using x in A'
474 Function::iterator BBN = src; ++BBN;
475 BasicBlock* bbC = BasicBlock::Create("choice", &F, BBN);
476 //ChoicePoints.insert(bbC);
477 BBN = cast<BasicBlock>(Translate(src));
478 BasicBlock* bbCp = BasicBlock::Create("choice", &F, ++BBN);
479 ChoicePoints.insert(bbCp);
482 BranchInst::Create(cast<BasicBlock>(Translate(dst)), bbC);
483 BranchInst::Create(dst, cast<BasicBlock>(Translate(dst)),
484 Context->getConstantInt(Type::Int1Ty, true), bbCp);
487 TerminatorInst* iB = src->getTerminator();
488 for (unsigned x = 0; x < iB->getNumSuccessors(); ++x)
489 if (iB->getSuccessor(x) == dst)
490 iB->setSuccessor(x, bbC);
494 TerminatorInst* iBp = cast<TerminatorInst>(Translate(src->getTerminator()));
495 for (unsigned x = 0; x < iBp->getNumSuccessors(); ++x)
496 if (iBp->getSuccessor(x) == cast<BasicBlock>(Translate(dst)))
497 iBp->setSuccessor(x, bbCp);
500 ReplacePhiPred(dst, src, bbC);
501 //src could be a switch, in which case we are replacing several edges with one
502 //thus collapse those edges int the Phi
503 CollapsePhi(dst, bbC);
505 ReplacePhiPred(cast<BasicBlock>(Translate(dst)),
506 cast<BasicBlock>(Translate(src)),bbCp);
507 CollapsePhi(cast<BasicBlock>(Translate(dst)), bbCp);
509 for(BasicBlock::iterator ib = dst->begin(), ie = dst->end(); ib != ie;
511 if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
512 for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x)
513 if(bbC == phi->getIncomingBlock(x)) {
514 phi->addIncoming(Translate(phi->getIncomingValue(x)), bbCp);
515 cast<PHINode>(Translate(phi))->addIncoming(phi->getIncomingValue(x),
518 phi->removeIncomingValue(bbC);
522 bool ProfilerRS::runOnFunction(Function& F) {
523 if (!F.isDeclaration()) {
524 std::set<std::pair<BasicBlock*, BasicBlock*> > BackEdges;
525 RSProfilers& LI = getAnalysis<RSProfilers>();
527 getBackEdges(F, BackEdges);
529 //assume that stuff worked. now connect the duplicated basic blocks
530 //with the originals in such a way as to preserve ssa. yuk!
531 for (std::set<std::pair<BasicBlock*, BasicBlock*> >::iterator
532 ib = BackEdges.begin(), ie = BackEdges.end(); ib != ie; ++ib)
533 ProcessBackEdge(ib->first, ib->second, F);
535 //oh, and add the edge from the reg2mem created entry node to the
536 //duplicated second node
537 TerminatorInst* T = F.getEntryBlock().getTerminator();
538 ReplaceInstWithInst(T, BranchInst::Create(T->getSuccessor(0),
540 Translate(T->getSuccessor(0))),
541 Context->getConstantInt(Type::Int1Ty,
544 //do whatever is needed now that the function is duplicated
547 //add entry node to choice points
548 ChoicePoints.insert(&F.getEntryBlock());
550 for (std::set<BasicBlock*>::iterator
551 ii = ChoicePoints.begin(), ie = ChoicePoints.end(); ii != ie; ++ii)
552 c->ProcessChoicePoint(*ii);
554 ChoicePoints.clear();
562 bool ProfilerRS::doInitialization(Module &M) {
563 switch (RandomMethod) {
565 c = new GlobalRandomCounter(M, Type::Int32Ty, (1 << 14) - 1);
568 c = new GlobalRandomCounterOpt(M, Type::Int32Ty, (1 << 14) - 1);
571 c = new CycleCounter(M, (1 << 14) - 1);
577 void ProfilerRS::getAnalysisUsage(AnalysisUsage &AU) const {
578 AU.addRequired<RSProfilers>();
579 AU.addRequiredID(DemoteRegisterToMemoryID);
582 ///////////////////////////////////////
584 ///////////////////////////////////////
585 static void ReplacePhiPred(BasicBlock* btarget,
586 BasicBlock* bold, BasicBlock* bnew) {
587 for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end();
589 if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
590 for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x)
591 if(bold == phi->getIncomingBlock(x))
592 phi->setIncomingBlock(x, bnew);
596 static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc) {
597 for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end();
599 if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
600 std::map<BasicBlock*, Value*> counter;
601 for(unsigned i = 0; i < phi->getNumIncomingValues(); ) {
602 if (counter[phi->getIncomingBlock(i)]) {
603 assert(phi->getIncomingValue(i) == counter[phi->getIncomingBlock(i)]);
604 phi->removeIncomingValue(i, false);
606 counter[phi->getIncomingBlock(i)] = phi->getIncomingValue(i);
614 static void recBackEdge(BasicBlock* bb, T& BackEdges,
615 std::map<BasicBlock*, int>& color,
616 std::map<BasicBlock*, int>& depth,
617 std::map<BasicBlock*, int>& finish,
623 TerminatorInst* t= bb->getTerminator();
624 for(unsigned i = 0; i < t->getNumSuccessors(); ++i) {
625 BasicBlock* bbnew = t->getSuccessor(i);
626 if (color[bbnew] == 0)
627 recBackEdge(bbnew, BackEdges, color, depth, finish, time);
628 else if (color[bbnew] == 1) {
629 BackEdges.insert(std::make_pair(bb, bbnew));
640 //find the back edges and where they go to
642 static void getBackEdges(Function& F, T& BackEdges) {
643 std::map<BasicBlock*, int> color;
644 std::map<BasicBlock*, int> depth;
645 std::map<BasicBlock*, int> finish;
647 recBackEdge(&F.getEntryBlock(), BackEdges, color, depth, finish, time);
648 DOUT << F.getName() << " " << BackEdges.size() << "\n";
653 ModulePass* llvm::createNullProfilerRSPass() {
654 return new NullProfilerRS();
657 FunctionPass* llvm::createRSProfilingPass() {
658 return new ProfilerRS();