From 1b5c0cb71dd9d529a14cedb4bd89d544bf7e61c3 Mon Sep 17 00:00:00 2001
From: Evan Cheng <evan.cheng@apple.com>
Date: Fri, 28 Jan 2011 07:12:38 +0000
Subject: [PATCH] Revert r124462. There are a few big regressions that I need
 to fix first.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@124478 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/TailDuplication.cpp               | 11 +++-----
 lib/Transforms/Utils/SimplifyCFG.cpp          |  7 +----
 test/CodeGen/X86/loop-blocks.ll               | 11 ++++----
 test/Transforms/JumpThreading/and-and-cond.ll | 10 ++-----
 test/Transforms/JumpThreading/and-cond.ll     |  9 ++----
 test/Transforms/JumpThreading/thread-loads.ll |  9 +-----
 test/Transforms/SimplifyCFG/MagicPointer.ll   |  1 +
 test/Transforms/SimplifyCFG/basictest.ll      | 10 +++++++
 test/Transforms/SimplifyCFG/switch_create.ll  | 28 +------------------
 .../SimplifyCFG/switch_formation.dbg.ll       | 14 ++--------
 10 files changed, 32 insertions(+), 78 deletions(-)
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 15aed3436c7..ce4b1be8541 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -465,12 +465,9 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
     MaxDuplicateCount = TailDuplicateSize;
 
   if (PreRegAlloc) {
-    if (TailBB->empty())
-      return false;
-    const TargetInstrDesc &TID = TailBB->back().getDesc();
-    // Pre-regalloc tail duplication hurts compile time and doesn't help
-    // much except for indirect branches and returns.
-    if (!TID.isIndirectBranch() && !TID.isReturn())
+      // Pre-regalloc tail duplication hurts compile time and doesn't help
+      // much except for indirect branches.
+    if (TailBB->empty() || !TailBB->back().getDesc().isIndirectBranch())
       return false;
     // If the target has hardware branch prediction that can handle indirect
     // branches, duplicating them can often make them predictable when there
@@ -505,7 +502,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
   }
   // Heuristically, don't tail-duplicate calls if it would expand code size,
   // as it's less likely to be worth the extra cost.
-  if (InstrCount > 1 && (PreRegAlloc && HasCall))
+  if (InstrCount > 1 && HasCall)
     return false;
 
   DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 37e6d28d7b0..f6d7d76dbf6 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -28,7 +28,6 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -37,10 +36,6 @@
 #include <map>
 using namespace llvm;
 
-static cl::opt<bool>
-DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
-       cl::desc("Duplicate return instructions into unconditional branches"));
-
 STATISTIC(NumSpeculations, "Number of speculative executed instructions");
 
 namespace {
@@ -2032,7 +2027,7 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI) {
   }
   
   // If we found some, do the transformation!
-  if (!UncondBranchPreds.empty() && DupRet) {
+  if (!UncondBranchPreds.empty()) {
     while (!UncondBranchPreds.empty()) {
       BasicBlock *Pred = UncondBranchPreds.pop_back_val();
       DEBUG(dbgs() << "FOLDING: " << *BB
diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll
index faba6300712..354d0820697 100644
--- a/test/CodeGen/X86/loop-blocks.ll
+++ b/test/CodeGen/X86/loop-blocks.ll
@@ -70,7 +70,6 @@ exit:
 
 ; Same as slightly_more_involved, but block_a is now a CFG diamond with
 ; fallthrough edges which should be preserved.
-; "callq block_a_merge_func" is tail duped.
 
 ; CHECK: yet_more_involved:
 ;      CHECK:   jmp .LBB2_1
@@ -79,12 +78,12 @@ exit:
 ; CHECK-NEXT:   callq bar99
 ; CHECK-NEXT:   callq get
 ; CHECK-NEXT:   cmpl $2999, %eax
-; CHECK-NEXT:   jle .LBB2_5
-; CHECK-NEXT:   callq block_a_false_func
-; CHECK-NEXT:   callq block_a_merge_func
-; CHECK-NEXT:   jmp .LBB2_1
-; CHECK-NEXT: .LBB2_5:
+; CHECK-NEXT:   jg .LBB2_6
 ; CHECK-NEXT:   callq block_a_true_func
+; CHECK-NEXT:   jmp .LBB2_7
+; CHECK-NEXT: .LBB2_6:
+; CHECK-NEXT:   callq block_a_false_func
+; CHECK-NEXT: .LBB2_7:
 ; CHECK-NEXT:   callq block_a_merge_func
 ; CHECK-NEXT: .LBB2_1:
 ; CHECK-NEXT:   callq body
diff --git a/test/Transforms/JumpThreading/and-and-cond.ll b/test/Transforms/JumpThreading/and-and-cond.ll
index 765d940cc7c..e6db9ee5a32 100644
--- a/test/Transforms/JumpThreading/and-and-cond.ll
+++ b/test/Transforms/JumpThreading/and-and-cond.ll
@@ -1,14 +1,14 @@
-; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg  -S | FileCheck %s
+; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg  -S | grep {ret i32 %v1}
+; There should be no uncond branches left.
+; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg  -S | not grep {br label}
 
 declare i32 @f1()
 declare i32 @f2()
 declare void @f3()
 
 define i32 @test(i1 %cond, i1 %cond2, i1 %cond3) {
-; CHECK: test
 	br i1 %cond, label %T1, label %F1
 
-; CHECK-NOT: T1:
 T1:
 	%v1 = call i32 @f1()
 	br label %Merge
@@ -18,10 +18,6 @@ F1:
 	br label %Merge
 
 Merge:
-; CHECK: Merge:
-; CHECK: %v1 = call i32 @f1()
-; CHECK-NEXT: %D = and i1 %cond2, %cond3
-; CHECK-NEXT: br i1 %D
 	%A = phi i1 [true, %T1], [false, %F1]
 	%B = phi i32 [%v1, %T1], [%v2, %F1]
 	%C = and i1 %A, %cond2
diff --git a/test/Transforms/JumpThreading/and-cond.ll b/test/Transforms/JumpThreading/and-cond.ll
index 0159bb3bb76..58dbec72a76 100644
--- a/test/Transforms/JumpThreading/and-cond.ll
+++ b/test/Transforms/JumpThreading/and-cond.ll
@@ -1,14 +1,14 @@
-; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg  -S | FileCheck %s
+; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg  -S | grep {ret i32 %v1}
+; There should be no uncond branches left.
+; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg  -S | not grep {br label}
 
 declare i32 @f1()
 declare i32 @f2()
 declare void @f3()
 
 define i32 @test(i1 %cond, i1 %cond2) {
-; CHECK: test
 	br i1 %cond, label %T1, label %F1
 
-; CHECK-NOT: T1
 T1:
 	%v1 = call i32 @f1()
 	br label %Merge
@@ -18,9 +18,6 @@ F1:
 	br label %Merge
 
 Merge:
-; CHECK: Merge:
-; CHECK: %v1 = call i32 @f1()
-; CHECK-NEXT: br i1 %cond2
 	%A = phi i1 [true, %T1], [false, %F1]
 	%B = phi i32 [%v1, %T1], [%v2, %F1]
 	%C = and i1 %A, %cond2
diff --git a/test/Transforms/JumpThreading/thread-loads.ll b/test/Transforms/JumpThreading/thread-loads.ll
index cce23ea319c..96ba701046d 100644
--- a/test/Transforms/JumpThreading/thread-loads.ll
+++ b/test/Transforms/JumpThreading/thread-loads.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -jump-threading -S | FileCheck %s
+; RUN: opt < %s -jump-threading -simplifycfg -S | grep {ret i32 1}
 ; rdar://6402033
 
 ; Test that we can thread through the block with the partially redundant load (%2).
@@ -6,16 +6,12 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 target triple = "i386-apple-darwin7"
 
 define i32 @foo(i32* %P) nounwind {
-; CHECK: foo
 entry:
 	%0 = tail call i32 (...)* @f1() nounwind		; <i32> [#uses=1]
 	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
 	br i1 %1, label %bb1, label %bb
 
 bb:		; preds = %entry
-; CHECK: bb1.thread:
-; CHECK: store
-; CHECK: br label %bb3
 	store i32 42, i32* %P, align 4
 	br label %bb1
 
@@ -30,9 +26,6 @@ bb2:		; preds = %bb1
 	ret i32 %res.0
 
 bb3:		; preds = %bb1
-; CHECK: bb3:
-; CHECK: %res.01 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ]
-; CHECK: ret i32 %res.01
 	ret i32 %res.0
 }
 
diff --git a/test/Transforms/SimplifyCFG/MagicPointer.ll b/test/Transforms/SimplifyCFG/MagicPointer.ll
index 93b9a276eac..54e5b14880c 100644
--- a/test/Transforms/SimplifyCFG/MagicPointer.ll
+++ b/test/Transforms/SimplifyCFG/MagicPointer.ll
@@ -8,6 +8,7 @@
 ; CHECK: i64 2, label
 ; CHECK: i64 3, label
 ; CHECK: i64 4, label
+; CHECK-NOT: br
 ; CHECK: }
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/test/Transforms/SimplifyCFG/basictest.ll b/test/Transforms/SimplifyCFG/basictest.ll
index 052e10667da..b485f6ab05f 100644
--- a/test/Transforms/SimplifyCFG/basictest.ll
+++ b/test/Transforms/SimplifyCFG/basictest.ll
@@ -25,6 +25,16 @@ define void @test3(i1 %T) {
 }
 
 
+define void @test4() {
+  br label %return
+return:
+  ret void
+; CHECK: @test4
+; CHECK-NEXT: ret void
+}
+@test4g = global i8* blockaddress(@test4, %return)
+
+
 ; PR5795
 define void @test5(i32 %A) {
   switch i32 %A, label %return [
diff --git a/test/Transforms/SimplifyCFG/switch_create.ll b/test/Transforms/SimplifyCFG/switch_create.ll
index 4e199bc8596..da7f65a6ca3 100644
--- a/test/Transforms/SimplifyCFG/switch_create.ll
+++ b/test/Transforms/SimplifyCFG/switch_create.ll
@@ -147,7 +147,7 @@ UnifiedReturnBlock:             ; preds = %shortcirc_done.4, %shortcirc_next.4
 ; CHECK:       i32 16, label %UnifiedReturnBlock
 ; CHECK:       i32 17, label %UnifiedReturnBlock
 ; CHECK:       i32 18, label %UnifiedReturnBlock
-; CHECK:       i32 19, label %UnifiedReturnBlock
+; CHECK:       i32 19, label %switch.edge
 ; CHECK:     ]
 }
 
@@ -441,29 +441,3 @@ if.end:
 ; CHECK-NOT: switch
 ; CHECK: ret void
 }
-
-; PR8675
-; rdar://5134905
-define zeroext i1 @test16(i32 %x) nounwind {
-entry:
-; CHECK: @test16
-; CHECK: switch i32 %x, label %lor.rhs [
-; CHECK:   i32 1, label %lor.end
-; CHECK:   i32 2, label %lor.end
-; CHECK:   i32 3, label %lor.end
-; CHECK: ]
-  %cmp.i = icmp eq i32 %x, 1
-  br i1 %cmp.i, label %lor.end, label %lor.lhs.false
-
-lor.lhs.false:
-  %cmp.i2 = icmp eq i32 %x, 2
-  br i1 %cmp.i2, label %lor.end, label %lor.rhs
-
-lor.rhs:
-  %cmp.i1 = icmp eq i32 %x, 3
-  br label %lor.end
-
-lor.end:
-  %0 = phi i1 [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp.i1, %lor.rhs ]
-  ret i1 %0
-}
diff --git a/test/Transforms/SimplifyCFG/switch_formation.dbg.ll b/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
index 357ffb60e1e..f1c820ec43b 100644
--- a/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
+++ b/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -simplifycfg -S | FileCheck %s
+; RUN: opt < %s -simplifycfg -S | not grep br
+
 
         %llvm.dbg.anchor.type = type { i32, i32 }
         %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
@@ -12,16 +13,7 @@
 
 declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
 
-define i1 @t({ i32, i32 }* %I) {
-; CHECK: t
-; CHECK:  switch i32 %tmp.2.i, label %shortcirc_next.4 [
-; CHECK:    i32 14, label %UnifiedReturnBlock
-; CHECK:    i32 15, label %UnifiedReturnBlock
-; CHECK:    i32 16, label %UnifiedReturnBlock
-; CHECK:    i32 17, label %UnifiedReturnBlock
-; CHECK:    i32 18, label %UnifiedReturnBlock
-; CHECK:    i32 19, label %UnifiedReturnBlock
-; CHECK:  ]
+define i1 @_ZN4llvm11SetCondInst7classofEPKNS_11InstructionE({ i32, i32 }* %I) {
 entry:
         %tmp.1.i = getelementptr { i32, i32 }* %I, i64 0, i32 1         ; <i32*> [#uses=1]
         %tmp.2.i = load i32* %tmp.1.i           ; <i32> [#uses=6]
-- 
2.34.1