// Widen each instruction in the old loop to a new one in the new loop.
// Use the Legality module to find the induction and reduction variables.
vectorizeLoop();
- // Register the new loop and update the analysis passes.
- updateAnalysis();
}
// Return true if any runtime check is added.
PHINode *OldInduction;
/// Maps scalars to widened vectors.
ValueMap WidenMap;
+ /// Store instructions that should be predicated, as a pair
+ /// <StoreInst, Predicate>
+ SmallVector<std::pair<StoreInst*,Value*>, 4> PredicatedStores;
EdgeMaskCache MaskCache;
/// Trip count of the original loop.
Value *TripCount;
// Create a new entry in the WidenMap and initialize it to Undef or Null.
VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
- Instruction *InsertPt = Builder.GetInsertPoint();
- BasicBlock *IfBlock = Builder.GetInsertBlock();
- BasicBlock *CondBlock = nullptr;
-
VectorParts Cond;
- Loop *VectorLp = nullptr;
if (IfPredicateStore) {
assert(Instr->getParent()->getSinglePredecessor() &&
"Only support single predecessor blocks");
Cond = createEdgeMask(Instr->getParent()->getSinglePredecessor(),
Instr->getParent());
- VectorLp = LI->getLoopFor(IfBlock);
- assert(VectorLp && "Must have a loop for this block");
}
// For each vector unroll 'part':
if (IfPredicateStore) {
Cmp = Builder.CreateExtractElement(Cond[Part], Builder.getInt32(Width));
Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp, ConstantInt::get(Cmp->getType(), 1));
- CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
- LoopVectorBody.push_back(CondBlock);
- VectorLp->addBasicBlockToLoop(CondBlock, *LI);
- // Update Builder with newly created basic block.
- Builder.SetInsertPoint(InsertPt);
}
Instruction *Cloned = Instr->clone();
VecResults[Part] = Builder.CreateInsertElement(VecResults[Part], Cloned,
Builder.getInt32(Width));
// End if-block.
- if (IfPredicateStore) {
- BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
- LoopVectorBody.push_back(NewIfBlock);
- VectorLp->addBasicBlockToLoop(NewIfBlock, *LI);
- Builder.SetInsertPoint(InsertPt);
- ReplaceInstWithInst(IfBlock->getTerminator(),
- BranchInst::Create(CondBlock, NewIfBlock, Cmp));
- IfBlock = NewIfBlock;
- }
+ if (IfPredicateStore)
+ PredicatedStores.push_back(std::make_pair(cast<StoreInst>(Cloned),
+ Cmp));
}
}
}
fixLCSSAPHIs();
+ // Make sure DomTree is updated.
+ updateAnalysis();
+
+ // Predicate any stores.
+ for (auto KV : PredicatedStores) {
+ BasicBlock::iterator I(KV.first);
+ auto *BB = SplitBlock(I->getParent(), std::next(I), DT, LI);
+ auto *T = SplitBlockAndInsertIfThen(KV.second, I, /*Unreachable=*/false,
+ /*BranchWeights=*/nullptr, DT);
+ I->moveBefore(T);
+ I->getParent()->setName("pred.store.if");
+ BB->setName("pred.store.continue");
+ }
+ DEBUG(DT->verifyDomTree());
// Remove redundant induction instructions.
cse(LoopVectorBody);
}
DT->addNewBlock(LoopBypassBlocks[I], LoopBypassBlocks[I-1]);
DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlocks.back());
- // Due to if predication of stores we might create a sequence of "if(pred)
- // a[i] = ...; " blocks.
- for (unsigned i = 0, e = LoopVectorBody.size(); i != e; ++i) {
- if (i == 0)
- DT->addNewBlock(LoopVectorBody[0], LoopVectorPreHeader);
- else if (isPredicatedBlock(i)) {
- DT->addNewBlock(LoopVectorBody[i], LoopVectorBody[i-1]);
- } else {
- DT->addNewBlock(LoopVectorBody[i], LoopVectorBody[i-2]);
- }
- }
+ // We don't predicate stores by this point, so the vector body should be a
+ // single loop.
+ assert(LoopVectorBody.size() == 1 && "Expected single block loop!");
+ DT->addNewBlock(LoopVectorBody[0], LoopVectorPreHeader);
DT->addNewBlock(LoopMiddleBlock, LoopVectorBody.back());
DT->addNewBlock(LoopScalarPreHeader, LoopBypassBlocks[0]);
// Create a new entry in the WidenMap and initialize it to Undef or Null.
VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
- Instruction *InsertPt = Builder.GetInsertPoint();
- BasicBlock *IfBlock = Builder.GetInsertBlock();
- BasicBlock *CondBlock = nullptr;
-
VectorParts Cond;
- Loop *VectorLp = nullptr;
if (IfPredicateStore) {
assert(Instr->getParent()->getSinglePredecessor() &&
"Only support single predecessor blocks");
Cond = createEdgeMask(Instr->getParent()->getSinglePredecessor(),
Instr->getParent());
- VectorLp = LI->getLoopFor(IfBlock);
- assert(VectorLp && "Must have a loop for this block");
}
// For each vector unroll 'part':
Builder.CreateExtractElement(Cond[Part], Builder.getInt32(0));
Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cond[Part],
ConstantInt::get(Cond[Part]->getType(), 1));
- CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
- LoopVectorBody.push_back(CondBlock);
- VectorLp->addBasicBlockToLoop(CondBlock, *LI);
- // Update Builder with newly created basic block.
- Builder.SetInsertPoint(InsertPt);
}
Instruction *Cloned = Instr->clone();
if (!IsVoidRetTy)
VecResults[Part] = Cloned;
- // End if-block.
- if (IfPredicateStore) {
- BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
- LoopVectorBody.push_back(NewIfBlock);
- VectorLp->addBasicBlockToLoop(NewIfBlock, *LI);
- Builder.SetInsertPoint(InsertPt);
- ReplaceInstWithInst(IfBlock->getTerminator(),
- BranchInst::Create(CondBlock, NewIfBlock, Cmp));
- IfBlock = NewIfBlock;
- }
+ // End if-block.
+ if (IfPredicateStore)
+ PredicatedStores.push_back(std::make_pair(cast<StoreInst>(Cloned),
+ Cmp));
}
}
-; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL
-; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec < %s | FileCheck %s --check-prefix=VEC
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize -simplifycfg < %s | FileCheck %s --check-prefix=UNROLL
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL-NOSIMPLIFY
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec -simplifycfg < %s | FileCheck %s --check-prefix=VEC
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec -simplifycfg -instcombine < %s | FileCheck %s --check-prefix=VEC-IC
+
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
; VEC: %[[v10:.+]] = and <2 x i1> %[[v8]], <i1 true, i1 true>
; VEC: %[[v11:.+]] = extractelement <2 x i1> %[[v10]], i32 0
; VEC: %[[v12:.+]] = icmp eq i1 %[[v11]], true
+; VEC: %[[v13:.+]] = extractelement <2 x i32> %[[v9]], i32 0
+; VEC: %[[v14:.+]] = extractelement <2 x i32*> %{{.*}}, i32 0
; VEC: br i1 %[[v12]], label %[[cond:.+]], label %[[else:.+]]
;
; VEC: [[cond]]:
-; VEC: %[[v13:.+]] = extractelement <2 x i32> %[[v9]], i32 0
-; VEC: %[[v14:.+]] = extractelement <2 x i32*> %{{.*}}, i32 0
; VEC: store i32 %[[v13]], i32* %[[v14]], align 4
; VEC: br label %[[else:.+]]
;
; VEC: [[else]]:
; VEC: %[[v15:.+]] = extractelement <2 x i1> %[[v10]], i32 1
; VEC: %[[v16:.+]] = icmp eq i1 %[[v15]], true
+; VEC: %[[v17:.+]] = extractelement <2 x i32> %[[v9]], i32 1
+; VEC: %[[v18:.+]] = extractelement <2 x i32*> %{{.+}} i32 1
; VEC: br i1 %[[v16]], label %[[cond2:.+]], label %[[else2:.+]]
;
; VEC: [[cond2]]:
-; VEC: %[[v17:.+]] = extractelement <2 x i32> %[[v9]], i32 1
-; VEC: %[[v18:.+]] = extractelement <2 x i32*> %{{.+}} i32 1
; VEC: store i32 %[[v17]], i32* %[[v18]], align 4
; VEC: br label %[[else2:.+]]
;
; VEC: [[else2]]:
+; VEC-IC-LABEL: test
+; VEC-IC: %[[v1:.+]] = icmp sgt <2 x i32> %{{.*}}, <i32 100, i32 100>
+; VEC-IC: %[[v2:.+]] = add nsw <2 x i32> %{{.*}}, <i32 20, i32 20>
+; VEC-IC: %[[v3:.+]] = extractelement <2 x i1> %[[v1]], i32 0
+; VEC-IC: br i1 %[[v3]], label %[[cond:.+]], label %[[else:.+]]
+;
+; VEC-IC: [[cond]]:
+; VEC-IC: %[[v4:.+]] = extractelement <2 x i32> %[[v2]], i32 0
+; VEC-IC: store i32 %[[v4]], i32* %{{.*}}, align 4
+; VEC-IC: br label %[[else:.+]]
+;
+; VEC-IC: [[else]]:
+; VEC-IC: %[[v5:.+]] = extractelement <2 x i1> %[[v1]], i32 1
+; VEC-IC: br i1 %[[v5]], label %[[cond2:.+]], label %[[else2:.+]]
+;
+; VEC-IC: [[cond2]]:
+; VEC-IC: %[[v6:.+]] = extractelement <2 x i32> %[[v2]], i32 1
+; VEC-IC: store i32 %[[v6]], i32* %{{.*}}, align 4
+; VEC-IC: br label %[[else2:.+]]
+;
+; VEC-IC: [[else2]]:
+
; UNROLL-LABEL: test
; UNROLL: vector.body:
; UNROLL: %[[IND:[a-zA-Z0-9]+]] = add i64 %{{.*}}, 0
; vectorized loop body.
; PR18724
-; UNROLL-LABEL: bug18724
-; UNROLL: store i32
-; UNROLL: store i32
+; UNROLL-NOSIMPLIFY-LABEL: bug18724
+; UNROLL-NOSIMPLIFY: store i32
+; UNROLL-NOSIMPLIFY: store i32
define void @bug18724() {
entry: