From 8624519c0c8a2c9f2a2eaac4e33ad9656842c5f9 Mon Sep 17 00:00:00 2001 From: Erik Eckstein Date: Sat, 2 Aug 2014 19:39:42 +0000 Subject: [PATCH] fix bug 20513 - Crash in SLP Vectorizer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214638 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/SLPVectorizer.cpp | 24 ++++++---- .../SLPVectorizer/X86/crash_scheduling.ll | 47 +++++++++++++++++++ 2 files changed, 61 insertions(+), 10 deletions(-) create mode 100644 test/Transforms/SLPVectorizer/X86/crash_scheduling.ll diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index cb9def22542..c91ca280033 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -806,7 +806,7 @@ private: /// Performs the "real" scheduling. Done before vectorization is actually /// performed in a basic block. - void scheduleBlock(BasicBlock *BB); + void scheduleBlock(BlockScheduling *BS); /// List of users to ignore during scheduling and that don't need extracting. ArrayRef UserIgnoreList; @@ -1741,8 +1741,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { setInsertPointAfterBundle(E->Scalars); return Gather(E->Scalars, VecTy); } - BasicBlock *BB = VL0->getParent(); - scheduleBlock(BB); unsigned Opcode = getSameOpcode(E->Scalars); @@ -2076,6 +2074,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } Value *BoUpSLP::vectorizeTree() { + + // All blocks must be scheduled before any instructions are inserted. + for (auto &BSIter : BlocksSchedules) { + scheduleBlock(BSIter.second.get()); + } + Builder.SetInsertPoint(F->getEntryBlock().begin()); vectorizeTree(&VectorizableTree[0]); @@ -2548,12 +2552,12 @@ void BoUpSLP::BlockScheduling::resetSchedule() { ReadyInsts.clear(); } -void BoUpSLP::scheduleBlock(BasicBlock *BB) { - DEBUG(dbgs() << "SLP: schedule block " << BB->getName() << "\n"); - - BlockScheduling *BS = BlocksSchedules[BB].get(); - if (!BS || !BS->ScheduleStart) +void BoUpSLP::scheduleBlock(BlockScheduling *BS) { + + if (!BS->ScheduleStart) return; + + DEBUG(dbgs() << "SLP: schedule block " << BS->BB->getName() << "\n"); BS->resetSchedule(); @@ -2598,8 +2602,8 @@ void BoUpSLP::scheduleBlock(BasicBlock *BB) { while (BundleMember) { Instruction *pickedInst = BundleMember->Inst; if (LastScheduledInst->getNextNode() != pickedInst) { - BB->getInstList().remove(pickedInst); - BB->getInstList().insert(LastScheduledInst, pickedInst); + BS->BB->getInstList().remove(pickedInst); + BS->BB->getInstList().insert(LastScheduledInst, pickedInst); } LastScheduledInst = pickedInst; BundleMember = BundleMember->NextInBundle; diff --git a/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll new file mode 100644 index 00000000000..dddc1be1c1a --- /dev/null +++ b/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-darwin13.3.0" + +define void @_foo(double %p1, double %p2, double %p3) #0 { +entry: + %tab1 = alloca [256 x i32], align 16 + %tab2 = alloca [256 x i32], align 16 + br label %bb1 + + +bb1: + %mul19 = fmul double %p1, 1.638400e+04 + %mul20 = fmul double %p3, 1.638400e+04 + %add = fadd double %mul20, 8.192000e+03 + %mul21 = fmul double %p2, 1.638400e+04 + ; The SLPVectorizer crashed when scheduling this block after it inserted an + ; insertelement instruction (during vectorizing the for.body block) at this position. + br label %for.body + +for.body: + %indvars.iv266 = phi i64 [ 0, %bb1 ], [ %indvars.iv.next267, %for.body ] + %t.0259 = phi double [ 0.000000e+00, %bb1 ], [ %add27, %for.body ] + %p3.addr.0258 = phi double [ %add, %bb1 ], [ %add28, %for.body ] + %vecinit.i.i237 = insertelement <2 x double> undef, double %t.0259, i32 0 + %x13 = tail call i32 @_xfn(<2 x double> %vecinit.i.i237) #2 + %arrayidx = getelementptr inbounds [256 x i32]* %tab1, i64 0, i64 %indvars.iv266 + store i32 %x13, i32* %arrayidx, align 4, !tbaa !4 + %vecinit.i.i = insertelement <2 x double> undef, double %p3.addr.0258, i32 0 + %x14 = tail call i32 @_xfn(<2 x double> %vecinit.i.i) #2 + %arrayidx26 = getelementptr inbounds [256 x i32]* %tab2, i64 0, i64 %indvars.iv266 + store i32 %x14, i32* %arrayidx26, align 4, !tbaa !4 + %add27 = fadd double %mul19, %t.0259 + %add28 = fadd double %mul21, %p3.addr.0258 + %indvars.iv.next267 = add nuw nsw i64 %indvars.iv266, 1 + %exitcond = icmp eq i64 %indvars.iv.next267, 256 + br i1 %exitcond, label %return, label %for.body + +return: + ret void +} + +declare i32 @_xfn(<2 x double>) #4 + +!3 = metadata !{metadata !"int", metadata !4, i64 0} +!4 = metadata !{metadata !3, metadata !3, i64 0} -- 2.34.1