// FIXME: Use common api to estimate size.
for (BasicBlock::const_iterator OI = OrigHeader->begin(),
OE = OrigHeader->end(); OI != OE; ++OI) {
- if (isa<PHINode>(OI))
- continue; // PHI nodes don't count.
- if (isa<DbgInfoIntrinsic>(OI))
- continue; // Debug intrinsics don't count as size.
- ++Size;
+ if (isa<PHINode>(OI))
+ continue; // PHI nodes don't count.
+ if (isa<DbgInfoIntrinsic>(OI))
+ continue; // Debug intrinsics don't count as size.
+ ++Size;
}
if (Size > MAX_HEADER_SIZE)
for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreHeader));
- // For the rest of the instructions, create a clone in the OldPreHeader.
+ // For the rest of the instructions, either hoist to the OrigPreheader if
+ // possible or create a clone in the OldPreHeader if not.
TerminatorInst *LoopEntryBranch = OrigPreHeader->getTerminator();
- for (; I != E; ++I) {
- Instruction *C = I->clone();
- C->setName(I->getName());
+ while (I != E) {
+ Instruction *Inst = I++;
+
+ // If the instruction's operands are invariant and it doesn't read or write
+ // memory, then it is safe to hoist. Doing this doesn't change the order of
+ // execution in the preheader, but does prevent the instruction from
+ // executing in each iteration of the loop. This means it is safe to hoist
+ // something that might trap, but isn't safe to hoist something that reads
+ // memory (without proving that the loop doesn't write).
+ if (L->hasLoopInvariantOperands(Inst) &&
+ !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() &&
+ !isa<TerminatorInst>(Inst)) {
+ Inst->moveBefore(LoopEntryBranch);
+ continue;
+ }
+
+ // Otherwise, create a duplicate of the instruction.
+ Instruction *C = Inst->clone();
+ C->setName(Inst->getName());
C->insertBefore(LoopEntryBranch);
- ValueMap[I] = C;
+ ValueMap[Inst] = C;
}
// Along with all the other instructions, we just cloned OrigHeader's
--- /dev/null
+; RUN: opt -S -loop-rotate %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; PR5319 - The "arrayidx" gep should be hoisted, not duplicated. We should
+; end up with one phi node.
+define void @test1() nounwind ssp {
+; CHECK: @test1
+entry:
+ %array = alloca [20 x i32], align 16
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %cmp = icmp slt i32 %i.0, 100
+ %arrayidx = getelementptr inbounds [20 x i32]* %array, i64 0, i64 0
+ br i1 %cmp, label %for.body, label %for.end
+
+; CHECK: for.body:
+; CHECK-NEXT: phi i32 [ 0
+; CHECK-NEXT: store i32 0
+
+for.body: ; preds = %for.cond
+ store i32 0, i32* %arrayidx, align 16
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ]
+ call void @g(i32* %arrayidx.lcssa) nounwind
+ ret void
+}
+
+declare void @g(i32*)
+