1 //===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
9 // This pass implements the Bottom Up SLP vectorizer. It detects consecutive
10 // stores that can be put together into vector-stores. Next, it attempts to
11 // construct vectorizable tree using the use-def chains. If a profitable tree
12 // was found, the SLP vectorizer performs vectorization on the tree.
14 // The pass is inspired by the work described in the paper:
15 // "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
17 //===----------------------------------------------------------------------===//
18 #define SV_NAME "slp-vectorizer"
19 #define DEBUG_TYPE SV_NAME
22 #include "llvm/Transforms/Vectorize.h"
23 #include "llvm/Analysis/AliasAnalysis.h"
24 #include "llvm/Analysis/ScalarEvolution.h"
25 #include "llvm/Analysis/TargetTransformInfo.h"
26 #include "llvm/Analysis/Verifier.h"
27 #include "llvm/Analysis/LoopInfo.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/IntrinsicInst.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/IR/Value.h"
34 #include "llvm/Pass.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/raw_ostream.h"
43 SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
44 cl::desc("Only vectorize trees if the gain is above this "
45 "number. (gain = -cost of vectorization)"));
48 /// The SLPVectorizer Pass.
49 struct SLPVectorizer : public FunctionPass {
50 typedef std::map<Value*, BoUpSLP::StoreList> StoreListMap;
52 /// Pass identification, replacement for typeid
55 explicit SLPVectorizer() : FunctionPass(ID) {
56 initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
61 TargetTransformInfo *TTI;
65 virtual bool runOnFunction(Function &F) {
66 SE = &getAnalysis<ScalarEvolution>();
67 DL = getAnalysisIfAvailable<DataLayout>();
68 TTI = &getAnalysis<TargetTransformInfo>();
69 AA = &getAnalysis<AliasAnalysis>();
70 LI = &getAnalysis<LoopInfo>();
75 // Must have DataLayout. We can't require it because some tests run w/o
80 for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) {
82 bool BBChanged = false;
84 // Use the bollom up slp vectorizer to construct chains that start with
85 // he store instructions.
86 BoUpSLP R(BB, SE, DL, TTI, AA, LI->getLoopFor(BB));
88 // Vectorize trees that end at reductions.
89 BBChanged |= vectorizeReductions(BB, R);
91 // Vectorize trees that end at stores.
92 if (unsigned count = collectStores(BB, R)) {
94 DEBUG(dbgs()<<"SLP: Found " << count << " stores to vectorize.\n");
95 BBChanged |= vectorizeStoreChains(R);
98 // Try to hoist some of the scalarization code to the preheader.
99 if (BBChanged) hoistGatherSequence(LI, BB, R);
101 Changed |= BBChanged;
105 DEBUG(dbgs()<<"SLP: vectorized \""<<F.getName()<<"\"\n");
106 DEBUG(verifyFunction(F));
111 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
112 FunctionPass::getAnalysisUsage(AU);
113 AU.addRequired<ScalarEvolution>();
114 AU.addRequired<AliasAnalysis>();
115 AU.addRequired<TargetTransformInfo>();
116 AU.addRequired<LoopInfo>();
121 /// \brief Collect memory references and sort them according to their base
122 /// object. We sort the stores to their base objects to reduce the cost of the
123 /// quadratic search on the stores. TODO: We can further reduce this cost
124 /// if we flush the chain creation every time we run into a memory barrier.
125 unsigned collectStores(BasicBlock *BB, BoUpSLP &R);
127 /// \brief Try to vectorize a chain that starts at two arithmetic instrs.
128 bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
130 /// \brief Try to vectorize a list of operands.
131 bool tryToVectorizeList(BoUpSLP::ValueList &VL, BoUpSLP &R);
133 /// \brief Try to vectorize a chain that may start at the operands of \V;
134 bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
136 /// \brief Vectorize the stores that were collected in StoreRefs.
137 bool vectorizeStoreChains(BoUpSLP &R);
139 /// \brief Try to hoist gather sequences outside of the loop in cases where
140 /// all of the sources are loop invariant.
141 void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R);
143 /// \brief Scan the basic block and look for reductions that may start a
144 /// vectorization chain.
145 bool vectorizeReductions(BasicBlock *BB, BoUpSLP &R);
148 StoreListMap StoreRefs;
151 unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
154 for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
155 StoreInst *SI = dyn_cast<StoreInst>(it);
159 // Check that the pointer points to scalars.
160 if (SI->getValueOperand()->getType()->isAggregateType())
163 // Find the base of the GEP.
164 Value *Ptr = SI->getPointerOperand();
165 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
166 Ptr = GEP->getPointerOperand();
168 // Save the store locations.
169 StoreRefs[Ptr].push_back(SI);
175 bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
176 if (!A || !B) return false;
177 BoUpSLP::ValueList VL;
180 return tryToVectorizeList(VL, R);
183 bool SLPVectorizer::tryToVectorizeList(BoUpSLP::ValueList &VL, BoUpSLP &R) {
184 DEBUG(dbgs()<<"SLP: Vectorizing a list of length = " << VL.size() << ".\n");
185 int Cost = R.getTreeCost(VL);
186 int ExtrCost = R.getScalarizationCost(VL);
187 DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
188 " Cost of extract:" << ExtrCost << ".\n");
189 if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
190 DEBUG(dbgs()<<"SLP: Vectorizing pair.\n");
191 R.vectorizeArith(VL);
195 bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
196 if (!V) return false;
197 // Try to vectorize V.
198 if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
201 BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
202 BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
204 if (B && B->hasOneUse()) {
205 BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
206 BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
207 if (tryToVectorizePair(A, B0, R)) {
211 if (tryToVectorizePair(A, B1, R)) {
218 if (A && A->hasOneUse()) {
219 BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
220 BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
221 if (tryToVectorizePair(A0, B, R)) {
225 if (tryToVectorizePair(A1, B, R)) {
233 bool SLPVectorizer::vectorizeReductions(BasicBlock *BB, BoUpSLP &R) {
234 bool Changed = false;
235 for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
236 if (isa<DbgInfoIntrinsic>(it)) continue;
238 // Try to vectorize reductions that use PHINodes.
239 if (PHINode *P = dyn_cast<PHINode>(it)) {
240 // Check that the PHI is a reduction PHI.
241 if (P->getNumIncomingValues() != 2) return Changed;
242 Value *Rdx = (P->getIncomingBlock(0) == BB ? P->getIncomingValue(0) :
243 (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) :
245 // Check if this is a Binary Operator.
246 BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
250 Value *Inst = BI->getOperand(0);
251 if (Inst == P) Inst = BI->getOperand(1);
252 Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R);
256 // Try to vectorize trees that start at compare instructions.
257 if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
258 if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
262 for (int i = 0; i < 2; ++i)
263 if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i)))
264 Changed |= tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R);
272 bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
273 bool Changed = false;
274 // Attempt to sort and vectorize each of the store-groups.
275 for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end();
277 if (it->second.size() < 2)
280 DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " <<
281 it->second.size() << ".\n");
283 Changed |= R.vectorizeStores(it->second, -SLPCostThreshold);
288 void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,
290 // Check if this block is inside a loop.
291 Loop *L = LI->getLoopFor(BB);
295 // Check if it has a preheader.
296 BasicBlock *PreHeader = L->getLoopPreheader();
300 // Mark the insertion point for the block.
301 Instruction *Location = PreHeader->getTerminator();
303 BoUpSLP::ValueList &Gathers = R.getGatherSeqInstructions();
304 for (BoUpSLP::ValueList::iterator it = Gathers.begin(), e = Gathers.end();
306 InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it);
308 // The InsertElement sequence can be simplified into a constant.
312 // If the vector or the element that we insert into it are
313 // instructions that are defined in this basic block then we can't
314 // hoist this instruction.
315 Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0));
316 Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1));
317 if (CurrVec && L->contains(CurrVec)) continue;
318 if (NewElem && L->contains(NewElem)) continue;
320 // We can hoist this instruction. Move it to the pre-header.
321 Insert->moveBefore(Location);
325 } // end anonymous namespace
327 char SLPVectorizer::ID = 0;
328 static const char lv_name[] = "SLP Vectorizer";
329 INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
330 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
331 INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
332 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
333 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
334 INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
337 Pass *createSLPVectorizerPass() {
338 return new SLPVectorizer();