From e55e117ad3fbebe7860b7023aac55c2a40c952f7 Mon Sep 17 00:00:00 2001 From: Brendon Cahoon Date: Wed, 13 May 2015 14:54:24 +0000 Subject: [PATCH] [Hexagon] Generate hardware loop when loop has a critical edge The hardware loop pass should try to generate a hardware loop instruction when the original loop has a critical edge. Differential Revision: http://reviews.llvm.org/D9678 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237258 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonHardwareLoops.cpp | 50 +++++++++++++----- test/CodeGen/Hexagon/hwloop-crit-edge.ll | 58 +++++++++++++++++++++ 2 files changed, 95 insertions(+), 13 deletions(-) create mode 100644 test/CodeGen/Hexagon/hwloop-crit-edge.ll diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 0cad3d04c14..aebfe26154d 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -555,7 +555,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, SmallVector Cond; MachineBasicBlock *TB = nullptr, *FB = nullptr; - bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); + bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false); if (NotAnalyzed) return nullptr; @@ -563,7 +563,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, // TB must be non-null. If FB is also non-null, one of them must be // the header. Otherwise, branch to TB could be exiting the loop, and // the fall through can go to the header. - assert (TB && "Latch block without a branch?"); + assert (TB && "Exit block without a branch?"); if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) { MachineBasicBlock *LTB = 0, *LFB = 0; SmallVector LCond; @@ -571,9 +571,9 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, if (NotAnalyzed) return nullptr; if (TB == Latch) - (LTB == Header) ? TB = LTB: TB = LFB; - else // FB == Latch - (LTB == Header) ? FB = LTB: FB = LFB; + TB = (LTB == Header) ? LTB : LFB; + else + FB = (LTB == Header) ? LTB: LFB; } assert ((!FB || TB == Header || FB == Header) && "Branches not to header?"); if (!TB || (FB && TB != Header && FB != Header)) @@ -1347,17 +1347,38 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { MachineBasicBlock *TB = nullptr, *FB = nullptr; SmallVector Cond; // AnalyzeBranch returns true if it fails to analyze branch. - bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); - if (NotAnalyzed) + bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false); + if (NotAnalyzed || Cond.empty()) return false; - // Check if the latch branch is unconditional. - if (Cond.empty()) - return false; + if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) { + MachineBasicBlock *LTB = 0, *LFB = 0; + SmallVector LCond; + bool NotAnalyzed = TII->AnalyzeBranch(*Latch, LTB, LFB, LCond, false); + if (NotAnalyzed) + return false; - if (TB != Header && FB != Header) - // The latch does not go back to the header. Not a latch we know and love. - return false; + // Since latch is not the exiting block, the latch branch should be an + // unconditional branch to the loop header. + if (TB == Latch) + TB = (LTB == Header) ? LTB : LFB; + else + FB = (LTB == Header) ? LTB : LFB; + } + if (TB != Header) { + if (FB != Header) { + // The latch/exit block does not go back to the header. + return false; + } + // FB is the header (i.e., uncond. jump to branch header) + // In this case, the LoopBody -> TB should not be a back edge otherwise + // it could result in an infinite loop after conversion to hw_loop. + // This case can happen when the Latch has two jumps like this: + // Jmp_c OuterLoopHeader <-- TB + // Jmp InnerLoopHeader <-- FB + if (MDT->dominates(TB, FB)) + return false; + } // Expecting a predicate register as a condition. It won't be a hardware // predicate register at this point yet, just a vreg. @@ -1368,6 +1389,9 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { if (CSz != 1 && CSz != 2) return false; + if (!Cond[CSz-1].isReg()) + return false; + unsigned P = Cond[CSz-1].getReg(); MachineInstr *PredDef = MRI->getVRegDef(P); diff --git a/test/CodeGen/Hexagon/hwloop-crit-edge.ll b/test/CodeGen/Hexagon/hwloop-crit-edge.ll new file mode 100644 index 00000000000..4de4540c142 --- /dev/null +++ b/test/CodeGen/Hexagon/hwloop-crit-edge.ll @@ -0,0 +1,58 @@ +; RUN: llc -O3 -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s +; +; Generate hardware loop when loop 'latch' block is different +; from the loop 'exiting' block. + +; CHECK: loop0(.LBB{{.}}_{{.}}, r{{[0-9]+}}) +; CHECK: endloop0 + +define void @test(i32* nocapture %pFL, i16 signext %nBS, i16* nocapture readonly %pHT) #0 { +entry: + %0 = load i32, i32* %pFL, align 4 + %1 = tail call i64 @llvm.hexagon.M2.dpmpyss.s0(i32 %0, i32 246) + %2 = tail call i64 @llvm.hexagon.S2.asl.r.p(i64 %1, i32 -13) + %3 = tail call i32 @llvm.hexagon.A2.sat(i64 %2) + store i32 %3, i32* %pFL, align 4 + %cmp16 = icmp sgt i16 %nBS, 0 + br i1 %cmp16, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: + %4 = sext i16 %nBS to i32 + br label %for.body + +for.body: + %5 = phi i32 [ %3, %for.body.lr.ph ], [ %.pre, %for.body.for.body_crit_edge ] + %arrayidx3.phi = phi i32* [ %pFL, %for.body.lr.ph ], [ %arrayidx3.inc, %for.body.for.body_crit_edge ] + %arrayidx5.phi = phi i16* [ %pHT, %for.body.lr.ph ], [ %arrayidx5.inc, %for.body.for.body_crit_edge ] + %i.017.pmt = phi i32 [ 1, %for.body.lr.ph ], [ %phitmp, %for.body.for.body_crit_edge ] + %6 = load i16, i16* %arrayidx5.phi, align 2 + %conv6 = sext i16 %6 to i32 + %7 = tail call i64 @llvm.hexagon.M2.dpmpyss.s0(i32 %5, i32 %conv6) + %8 = tail call i64 @llvm.hexagon.S2.asl.r.p(i64 %7, i32 -13) + %9 = tail call i32 @llvm.hexagon.A2.sat(i64 %8) + store i32 %9, i32* %arrayidx3.phi, align 4 + %exitcond = icmp eq i32 %i.017.pmt, %4 + %arrayidx3.inc = getelementptr i32, i32* %arrayidx3.phi, i32 1 + br i1 %exitcond, label %for.end.loopexit, label %for.body.for.body_crit_edge + +for.body.for.body_crit_edge: + %arrayidx5.inc = getelementptr i16, i16* %arrayidx5.phi, i32 1 + %.pre = load i32, i32* %arrayidx3.inc, align 4 + %phitmp = add i32 %i.017.pmt, 1 + br label %for.body + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +declare i32 @llvm.hexagon.A2.sat(i64) #1 + +declare i64 @llvm.hexagon.S2.asl.r.p(i64, i32) #1 + +declare i64 @llvm.hexagon.M2.dpmpyss.s0(i32, i32) #1 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "ssp-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } -- 2.34.1