Enable code placement optimization pass for ARM.

author Evan Cheng <evan.cheng@apple.com>

Fri, 24 Sep 2010 19:07:23 +0000 (19:07 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Fri, 24 Sep 2010 19:07:23 +0000 (19:07 +0000)
author Evan Cheng <evan.cheng@apple.com>
Fri, 24 Sep 2010 19:07:23 +0000 (19:07 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Fri, 24 Sep 2010 19:07:23 +0000 (19:07 +0000)
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index ae1fead462de3026812c3369aecee91054566ecd..4c1e7af814ff1b7ac6e13afc0a0b72f226d85578 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -76,11 +76,6 @@ ARMInterworking("arm-interworking", cl::Hidden,
    cl::desc("Enable / disable ARM interworking (for debugging only)"),
    cl::init(true));
  
-static cl::opt<bool>
-EnableARMCodePlacement("arm-code-placement", cl::Hidden,
-  cl::desc("Enable code placement pass for ARM"),
-  cl::init(false));
-
  void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
                                         EVT PromotedBitwiseVT) {
    if (VT != PromotedLdStVT) {
@@ -550,8 +545,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
    // are at least 4 bytes aligned.
    setMinStackArgumentAlignment(4);
  
-  if (EnableARMCodePlacement)
-    benefitFromCodePlacementOpt = true;
+  benefitFromCodePlacementOpt = true;
  }
  
  std::pair<const TargetRegisterClass*, uint8_t>
diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll

index 8d42a794feee0c1c1af985b758de63031b1fd1e8..9085b6804c0eef805e721c8e8a99654284da3def 100644 (file)
--- a/test/CodeGen/ARM/arm-and-tst-peephole.ll
+++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -18,13 +18,13 @@ tailrecurse:                                      ; preds = %sw.bb, %entry
    %0 = ptrtoint i8* %tmp2 to i32
  
  ; CHECK:      ands r12, r12, #3
-; CHECK-NEXT: beq LBB0_4
+; CHECK-NEXT: beq LBB0_2
  
  ; T2:      movs r5, #3
  ; T2-NEXT: mov r6, r4
  ; T2-NEXT: ands r6, r5
  ; T2-NEXT: tst r4, r5
-; T2-NEXT: beq LBB0_5
+; T2-NEXT: beq LBB0_3
  
    %and = and i32 %0, 3
    %tst = icmp eq i32 %and, 0
diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll

index 25c556889fc448918ab25586c2c576ae4ac7bd88..bcf29ed9b46f16037f6cda0917d7f2c08cb24a30 100644 (file)
--- a/test/CodeGen/ARM/code-placement.ll
+++ b/test/CodeGen/ARM/code-placement.ll
@@ -5,8 +5,9 @@
  %struct.list_data_s = type { i16, i16 }
  %struct.list_head = type { %struct.list_head*, %struct.list_data_s* }
  
-define arm_apcscc %struct.list_head* @t(%struct.list_head* %list) nounwind {
+define arm_apcscc %struct.list_head* @t1(%struct.list_head* %list) nounwind {
  entry:
+; CHECK: t1:
    %0 = icmp eq %struct.list_head* %list, null
    br i1 %0, label %bb2, label %bb
  
@@ -27,3 +28,51 @@ bb2:
    %next.0.lcssa = phi %struct.list_head* [ null, %entry ], [ %list_addr.05, %bb ]
    ret %struct.list_head* %next.0.lcssa
  }
+
+; Optimize loop entry, eliminate intra loop branches
+; rdar://8117827
+define i32 @t2(i32 %passes, i32* nocapture %src, i32 %size) nounwind readonly {
+entry:
+; CHECK: t2:
+; CHECK: beq LBB1_5
+  %0 = icmp eq i32 %passes, 0                     ; <i1> [#uses=1]
+  br i1 %0, label %bb5, label %bb.nph15
+
+; CHECK: LBB1_2
+bb1:                                              ; preds = %bb2.preheader, %bb1
+; CHECK: LBB1_3:
+; CHECK: bne LBB1_3
+  %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2]
+  %sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]
+  %tmp17 = sub i32 %i.07, %indvar                 ; <i32> [#uses=1]
+  %scevgep = getelementptr i32* %src, i32 %tmp17  ; <i32*> [#uses=1]
+  %1 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %2 = add nsw i32 %1, %sum.08                    ; <i32> [#uses=2]
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, %size     ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb3, label %bb1
+
+bb3:                                              ; preds = %bb1, %bb2.preheader
+; CHECK: LBB1_4
+; CHECK: bne LBB1_2
+; CHECK-NOT: b LBB1_
+; CHECK: ldmia sp!
+  %sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; <i32> [#uses=2]
+  %3 = add i32 %pass.011, 1                       ; <i32> [#uses=2]
+  %exitcond18 = icmp eq i32 %3, %passes           ; <i1> [#uses=1]
+  br i1 %exitcond18, label %bb5, label %bb2.preheader
+
+bb.nph15:                                         ; preds = %entry
+  %i.07 = add i32 %size, -1                       ; <i32> [#uses=2]
+  %4 = icmp sgt i32 %i.07, -1                     ; <i1> [#uses=1]
+  br label %bb2.preheader
+
+bb2.preheader:                                    ; preds = %bb3, %bb.nph15
+  %pass.011 = phi i32 [ 0, %bb.nph15 ], [ %3, %bb3 ] ; <i32> [#uses=1]
+  %sum.110 = phi i32 [ 0, %bb.nph15 ], [ %sum.0.lcssa, %bb3 ] ; <i32> [#uses=2]
+  br i1 %4, label %bb1, label %bb3
+
+bb5:                                              ; preds = %bb3, %entry
+  %sum.1.lcssa = phi i32 [ 0, %entry ], [ %sum.0.lcssa, %bb3 ] ; <i32> [#uses=1]
+  ret i32 %sum.1.lcssa
+}
author	Evan Cheng <evan.cheng@apple.com>
	Fri, 24 Sep 2010 19:07:23 +0000 (19:07 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Fri, 24 Sep 2010 19:07:23 +0000 (19:07 +0000)
lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
test/CodeGen/ARM/arm-and-tst-peephole.ll		patch \| blob \| history
test/CodeGen/ARM/code-placement.ll		patch \| blob \| history