1 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains an optimization for div and rem on architectures that
11 // execute short instructions significantly faster than longer instructions.
12 // For example, on Intel Atom 32-bit divides are slow enough that during
13 // runtime it is profitable to check the value of the operands, and if they are
14 // positive and less than 256 use an unsigned 8-bit divide.
16 //===----------------------------------------------------------------------===//
18 #define DEBUG_TYPE "bypass-slow-division"
19 #include "llvm/Instructions.h"
20 #include "llvm/Function.h"
21 #include "llvm/IRBuilder.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
33 DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
34 : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
41 DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
42 : Quotient(InQuotient), Remainder(InRemainder) {}
46 struct DenseMapInfo<DivOpInfo> {
47 static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
48 return Val1.SignedOp == Val2.SignedOp &&
49 Val1.Dividend == Val2.Dividend &&
50 Val1.Divisor == Val2.Divisor;
53 static DivOpInfo getEmptyKey() {
54 return DivOpInfo(false, 0, 0);
57 static DivOpInfo getTombstoneKey() {
58 return DivOpInfo(true, 0, 0);
61 static unsigned getHashValue(const DivOpInfo &Val) {
62 return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
63 reinterpret_cast<uintptr_t>(Val.Divisor)) ^
64 (unsigned)Val.SignedOp;
68 typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
71 // insertFastDiv - Substitutes the div/rem instruction with code that checks the
72 // value of the operands and uses a shorter-faster div/rem instruction when
73 // possible and the longer-slower div/rem instruction otherwise.
74 static void insertFastDiv(Function &F,
75 Function::iterator &I,
76 BasicBlock::iterator &J,
77 IntegerType *BypassType,
80 DivCacheTy &PerBBDivCache)
82 // Get instruction operands
83 Instruction *Instr = J;
84 Value *Dividend = Instr->getOperand(0);
85 Value *Divisor = Instr->getOperand(1);
87 if (dyn_cast<ConstantInt>(Divisor) != 0 ||
88 (dyn_cast<ConstantInt>(Dividend) != 0 &&
89 dyn_cast<ConstantInt>(Divisor) != 0)) {
90 // Operations with immediate values should have
91 // been solved and replaced during compile time.
95 // Basic Block is split before divide
96 BasicBlock *MainBB = I;
97 BasicBlock *SuccessorBB = I->splitBasicBlock(J);
98 I++; //advance iterator I to successorBB
100 // Add new basic block for slow divide operation
101 BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
102 MainBB->getParent(), SuccessorBB);
103 SlowBB->moveBefore(SuccessorBB);
104 IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
105 Value *SlowQuotientV;
106 Value *SlowRemainderV;
108 SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
109 SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
111 SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
112 SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
114 SlowBuilder.CreateBr(SuccessorBB);
116 // Add new basic block for fast divide operation
117 BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
118 MainBB->getParent(), SuccessorBB);
119 FastBB->moveBefore(SlowBB);
120 IRBuilder<> FastBuilder(FastBB, FastBB->begin());
121 Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, BypassType);
122 Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, BypassType);
124 // udiv/urem because optimization only handles positive numbers
125 Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
127 Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
129 Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
131 Dividend->getType());
132 Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
134 Dividend->getType());
135 FastBuilder.CreateBr(SuccessorBB);
137 // Phi nodes for result of div and rem
138 IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
139 PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
140 QuoPhi->addIncoming(SlowQuotientV, SlowBB);
141 QuoPhi->addIncoming(FastQuotientV, FastBB);
142 PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
143 RemPhi->addIncoming(SlowRemainderV, SlowBB);
144 RemPhi->addIncoming(FastRemainderV, FastBB);
146 // Replace Instr with appropriate phi node
148 Instr->replaceAllUsesWith(QuoPhi);
150 Instr->replaceAllUsesWith(RemPhi);
152 Instr->eraseFromParent();
154 // Combine operands into a single value with OR for value testing below
155 MainBB->getInstList().back().eraseFromParent();
156 IRBuilder<> MainBuilder(MainBB, MainBB->end());
157 Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
159 // BitMask is inverted to check if the operands are
160 // larger than the bypass type
161 uint64_t BitMask = ~BypassType->getBitMask();
162 Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
164 // Compare operand values and branch
165 Value *ZeroV = MainBuilder.getInt32(0);
166 Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
167 MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
169 // point iterator J at first instruction of successorBB
172 // Cache phi nodes to be used later in place of other instances
173 // of div or rem with the same sign, dividend, and divisor
174 DivOpInfo Key(UseSignedOp, Dividend, Divisor);
175 DivPhiNodes Value(QuoPhi, RemPhi);
176 PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
179 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
180 // operands and operation are identical. Otherwise call insertFastDiv to perform
181 // the optimization and cache the resulting dividend and remainder.
182 static void reuseOrInsertFastDiv(Function &F,
183 Function::iterator &I,
184 BasicBlock::iterator &J,
185 IntegerType *BypassType,
188 DivCacheTy &PerBBDivCache)
190 // Get instruction operands
191 Instruction *Instr = J;
192 DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
193 DivCacheTy::const_iterator CacheI = PerBBDivCache.find(Key);
195 if (CacheI == PerBBDivCache.end()) {
196 // If previous instance does not exist, insert fast div
197 insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, PerBBDivCache);
201 // Replace operation value with previously generated phi node
202 DivPhiNodes Value = CacheI->second;
204 // Replace all uses of div instruction with quotient phi node
205 J->replaceAllUsesWith(Value.Quotient);
207 // Replace all uses of rem instruction with remainder phi node
208 J->replaceAllUsesWith(Value.Remainder);
211 // Advance to next operation
214 // Remove redundant operation
215 Instr->eraseFromParent();
218 // bypassSlowDivision - This optimization identifies DIV instructions that can
219 // be profitably bypassed and carried out with a shorter, faster divide.
220 bool bypassSlowDivision(Function &F,
221 Function::iterator &I,
222 const llvm::DenseMap<Type *, Type *> &BypassTypeMap)
226 bool MadeChange = false;
227 for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
229 // Get instruction details
230 unsigned Opcode = J->getOpcode();
231 bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
232 bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
233 bool UseSignedOp = Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
235 // Only optimize div or rem ops
236 if (!UseDivOp && !UseRemOp) {
239 // Continue if div/rem type is not bypassed
240 DenseMap<Type *, Type *>::const_iterator BT = BypassTypeMap.find(J->getType());
241 if (BT == BypassTypeMap.end()) {
245 IntegerType *BypassType = (IntegerType *)BT->second;
246 reuseOrInsertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, DivCache);