[Hexagon] Generate hardware loop when loop has a critical edge

author Brendon Cahoon <bcahoon@codeaurora.org>

Wed, 13 May 2015 14:54:24 +0000 (14:54 +0000)

committer Brendon Cahoon <bcahoon@codeaurora.org>

Wed, 13 May 2015 14:54:24 +0000 (14:54 +0000)
author Brendon Cahoon <bcahoon@codeaurora.org>
Wed, 13 May 2015 14:54:24 +0000 (14:54 +0000)
committer Brendon Cahoon <bcahoon@codeaurora.org>
Wed, 13 May 2015 14:54:24 +0000 (14:54 +0000)
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp

index 0cad3d04c142287b334076fa1b6a33a26bc64df5..aebfe26154d6687b115acf1c13d2bd900c88bd74 100644 (file)
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -555,7 +555,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
  
    SmallVector<MachineOperand,2> Cond;
    MachineBasicBlock *TB = nullptr, *FB = nullptr;
-  bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
+  bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false);
    if (NotAnalyzed)
      return nullptr;
  
@@ -563,7 +563,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
    // TB must be non-null.  If FB is also non-null, one of them must be
    // the header.  Otherwise, branch to TB could be exiting the loop, and
    // the fall through can go to the header.
-  assert (TB && "Latch block without a branch?");
+  assert (TB && "Exit block without a branch?");
    if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
      MachineBasicBlock *LTB = 0, *LFB = 0;
      SmallVector<MachineOperand,2> LCond;
@@ -571,9 +571,9 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
      if (NotAnalyzed)
        return nullptr;
      if (TB == Latch)
-      (LTB == Header) ? TB = LTB: TB = LFB;
-    else // FB == Latch
-      (LTB == Header) ? FB = LTB: FB = LFB;
+      TB = (LTB == Header) ? LTB : LFB;
+    else
+      FB = (LTB == Header) ? LTB: LFB;
    }
    assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
    if (!TB || (FB && TB != Header && FB != Header))
@@ -1347,17 +1347,38 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
    MachineBasicBlock *TB = nullptr, *FB = nullptr;
    SmallVector<MachineOperand,2> Cond;
    // AnalyzeBranch returns true if it fails to analyze branch.
-  bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
-  if (NotAnalyzed)
+  bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false);
+  if (NotAnalyzed || Cond.empty())
      return false;
  
-  // Check if the latch branch is unconditional.
-  if (Cond.empty())
-    return false;
+  if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
+    MachineBasicBlock *LTB = 0, *LFB = 0;
+    SmallVector<MachineOperand,2> LCond;
+    bool NotAnalyzed = TII->AnalyzeBranch(*Latch, LTB, LFB, LCond, false);
+    if (NotAnalyzed)
+      return false;
  
-  if (TB != Header && FB != Header)
-    // The latch does not go back to the header.  Not a latch we know and love.
-    return false;
+    // Since latch is not the exiting block, the latch branch should be an
+    // unconditional branch to the loop header.
+    if (TB == Latch)
+      TB = (LTB == Header) ? LTB : LFB;
+    else
+      FB = (LTB == Header) ? LTB : LFB;
+  }
+  if (TB != Header) {
+    if (FB != Header) {
+      // The latch/exit block does not go back to the header.
+      return false;
+    }
+    // FB is the header (i.e., uncond. jump to branch header)
+    // In this case, the LoopBody -> TB should not be a back edge otherwise
+    // it could result in an infinite loop after conversion to hw_loop.
+    // This case can happen when the Latch has two jumps like this:
+    // Jmp_c OuterLoopHeader <-- TB
+    // Jmp   InnerLoopHeader <-- FB
+    if (MDT->dominates(TB, FB))
+      return false;
+  }
  
    // Expecting a predicate register as a condition.  It won't be a hardware
    // predicate register at this point yet, just a vreg.
@@ -1368,6 +1389,9 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
    if (CSz != 1 && CSz != 2)
      return false;
  
+  if (!Cond[CSz-1].isReg())
+    return false;
+
    unsigned P = Cond[CSz-1].getReg();
    MachineInstr *PredDef = MRI->getVRegDef(P);
  
diff --git a/test/CodeGen/Hexagon/hwloop-crit-edge.ll b/test/CodeGen/Hexagon/hwloop-crit-edge.ll

new file mode 100644 (file)

index 0000000..4de4540
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-crit-edge.ll
@@ -0,0 +1,58 @@
+; RUN: llc -O3 -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+;
+; Generate hardware loop when loop 'latch' block is different
+; from the loop 'exiting' block.
+
+; CHECK: loop0(.LBB{{.}}_{{.}}, r{{[0-9]+}})
+; CHECK: endloop0
+
+define void @test(i32* nocapture %pFL, i16 signext %nBS, i16* nocapture readonly %pHT) #0 {
+entry:
+  %0 = load i32, i32* %pFL, align 4
+  %1 = tail call i64 @llvm.hexagon.M2.dpmpyss.s0(i32 %0, i32 246)
+  %2 = tail call i64 @llvm.hexagon.S2.asl.r.p(i64 %1, i32 -13)
+  %3 = tail call i32 @llvm.hexagon.A2.sat(i64 %2)
+  store i32 %3, i32* %pFL, align 4
+  %cmp16 = icmp sgt i16 %nBS, 0
+  br i1 %cmp16, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %4 = sext i16 %nBS to i32
+  br label %for.body
+
+for.body:
+  %5 = phi i32 [ %3, %for.body.lr.ph ], [ %.pre, %for.body.for.body_crit_edge ]
+  %arrayidx3.phi = phi i32* [ %pFL, %for.body.lr.ph ], [ %arrayidx3.inc, %for.body.for.body_crit_edge ]
+  %arrayidx5.phi = phi i16* [ %pHT, %for.body.lr.ph ], [ %arrayidx5.inc, %for.body.for.body_crit_edge ]
+  %i.017.pmt = phi i32 [ 1, %for.body.lr.ph ], [ %phitmp, %for.body.for.body_crit_edge ]
+  %6 = load i16, i16* %arrayidx5.phi, align 2
+  %conv6 = sext i16 %6 to i32
+  %7 = tail call i64 @llvm.hexagon.M2.dpmpyss.s0(i32 %5, i32 %conv6)
+  %8 = tail call i64 @llvm.hexagon.S2.asl.r.p(i64 %7, i32 -13)
+  %9 = tail call i32 @llvm.hexagon.A2.sat(i64 %8)
+  store i32 %9, i32* %arrayidx3.phi, align 4
+  %exitcond = icmp eq i32 %i.017.pmt, %4
+  %arrayidx3.inc = getelementptr i32, i32* %arrayidx3.phi, i32 1
+  br i1 %exitcond, label %for.end.loopexit, label %for.body.for.body_crit_edge
+
+for.body.for.body_crit_edge:
+  %arrayidx5.inc = getelementptr i16, i16* %arrayidx5.phi, i32 1
+  %.pre = load i32, i32* %arrayidx3.inc, align 4
+  %phitmp = add i32 %i.017.pmt, 1
+  br label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+declare i32 @llvm.hexagon.A2.sat(i64) #1
+
+declare i64 @llvm.hexagon.S2.asl.r.p(i64, i32) #1
+
+declare i64 @llvm.hexagon.M2.dpmpyss.s0(i32, i32) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "ssp-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
author	Brendon Cahoon <bcahoon@codeaurora.org>
	Wed, 13 May 2015 14:54:24 +0000 (14:54 +0000)
committer	Brendon Cahoon <bcahoon@codeaurora.org>
	Wed, 13 May 2015 14:54:24 +0000 (14:54 +0000)
lib/Target/Hexagon/HexagonHardwareLoops.cpp		patch \| blob \| history
test/CodeGen/Hexagon/hwloop-crit-edge.ll	[new file with mode: 0644]	patch \| blob