Don't break the IV update in TLI::SimplifySetCC().

author Jakob Stoklund Olesen <stoklund@2pi.dk>

Thu, 5 Apr 2012 20:30:20 +0000 (20:30 +0000)

committer Jakob Stoklund Olesen <stoklund@2pi.dk>

Thu, 5 Apr 2012 20:30:20 +0000 (20:30 +0000)
author Jakob Stoklund Olesen <stoklund@2pi.dk>
Thu, 5 Apr 2012 20:30:20 +0000 (20:30 +0000)
committer Jakob Stoklund Olesen <stoklund@2pi.dk>
Thu, 5 Apr 2012 20:30:20 +0000 (20:30 +0000)
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index eefb9e84b17c779c4c3cf7a0f836ac90c85ceff8..03aed3aeca6a7fe24db8dd1b453ae9b0856bce7b 100644 (file)
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2471,6 +2471,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
          }
        }
  
+      // If RHS is a legal immediate value for a compare instruction, we need
+      // to be careful about increasing register pressure needlessly.
+      bool LegalRHSImm = false;
+
        if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
          if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
            // Turn (X+C1) == C2 --> X == C2-C1
@@ -2505,25 +2509,33 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                             Cond);
            }
          }
+
+        // Could RHSC fold directly into a compare?
+        if (RHSC->getValueType(0).getSizeInBits() <= 64)
+          LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
        }
  
        // Simplify (X+Z) == X -->  Z == 0
-      if (N0.getOperand(0) == N1)
-        return DAG.getSetCC(dl, VT, N0.getOperand(1),
-                        DAG.getConstant(0, N0.getValueType()), Cond);
-      if (N0.getOperand(1) == N1) {
-        if (DAG.isCommutativeBinOp(N0.getOpcode()))
-          return DAG.getSetCC(dl, VT, N0.getOperand(0),
-                          DAG.getConstant(0, N0.getValueType()), Cond);
-        else if (N0.getNode()->hasOneUse()) {
-          assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
-          // (Z-X) == X  --> Z == X<<1
-          SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
-                                     N1,
+      // Don't do this if X is an immediate that can fold into a cmp
+      // instruction and X+Z has other uses. It could be an induction variable
+      // chain, and the transform would increase register pressure.
+      if (!LegalRHSImm || N0.getNode()->hasOneUse()) {
+        if (N0.getOperand(0) == N1)
+          return DAG.getSetCC(dl, VT, N0.getOperand(1),
+                              DAG.getConstant(0, N0.getValueType()), Cond);
+        if (N0.getOperand(1) == N1) {
+          if (DAG.isCommutativeBinOp(N0.getOpcode()))
+            return DAG.getSetCC(dl, VT, N0.getOperand(0),
+                                DAG.getConstant(0, N0.getValueType()), Cond);
+          else if (N0.getNode()->hasOneUse()) {
+            assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+            // (Z-X) == X  --> Z == X<<1
+            SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1,
                         DAG.getConstant(1, getShiftAmountTy(N1.getValueType())));
-          if (!DCI.isCalledByLegalizer())
-            DCI.AddToWorklist(SH.getNode());
-          return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+            if (!DCI.isCalledByLegalizer())
+              DCI.AddToWorklist(SH.getNode());
+            return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+          }
          }
        }
      }
diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll

index 9ff114e2b6f21a3fe991f669bb8de943dde0a3bb..9aaa821698c1629ea271f081cf77d46e058f2099 100644 (file)
--- a/test/CodeGen/Thumb2/lsr-deficiency.ll
+++ b/test/CodeGen/Thumb2/lsr-deficiency.ll
@@ -3,11 +3,6 @@
  
  ; This now reduces to a single induction variable.
  
-; TODO: It still gets a GPR shuffle at the end of the loop
-; This is because something in instruction selection has decided
-; that comparing the pre-incremented value with zero is better
-; than comparing the post-incremented value with -4.
-
  @G = external global i32                          ; <i32*> [#uses=2]
  @array = external global i32*                     ; <i32**> [#uses=1]
  
@@ -20,9 +15,9 @@ entry:
  
  bb:                                               ; preds = %bb, %entry
  ; CHECK: LBB0_1:
-; CHECK: cmp [[R2:r[0-9]+]], #0
-; CHECK: sub{{(.w)?}} [[REGISTER:(r[0-9]+)|(lr)]], [[R2]], #1
-; CHECK: mov [[R2]], [[REGISTER]]
+; CHECK: subs [[R2:r[0-9]+]], #1
+; CHECK: cmp.w [[R2]], #-1
+; CHECK: bne LBB0_1
  
    %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ]     ; <i32> [#uses=1]
    %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll

index bdf09dff0b0d7a97c367d4854b58ae5f85026cef..ebda9f201df968eaa67d18050c18e99da156d55a 100644 (file)
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -1,5 +1,6 @@
  ; RUN: llc -mtriple=x86_64-darwin < %s | FileCheck %s
  
+; CHECK: t:
  ; CHECK: decq
  ; CHECK-NEXT: movl (
  ; CHECK-NEXT: jne
@@ -136,3 +137,44 @@ bb2:               ; preds = %bb
         store i8 %92, i8* %93, align 1
         ret void
  }
+
+; Check that DAGCombiner doesn't mess up the IV update when the exiting value
+; is equal to the stride.
+; It must not fold (cmp (add iv, 1), 1) --> (cmp iv, 0).
+
+; CHECK: f:
+; CHECK: %for.body
+; CHECK: incl [[IV:%e..]]
+; CHECK: cmpl $1, [[IV]]
+; CHECK: jne
+; CHECK: ret
+
+define i32 @f(i32 %i, i32* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+  %cmp4 = icmp eq i32 %i, 1
+  br i1 %cmp4, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %0 = sext i32 %i to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %bi.06 = phi i32 [ 0, %for.body.lr.ph ], [ %i.addr.0.bi.0, %for.body ]
+  %b.05 = phi i32 [ 0, %for.body.lr.ph ], [ %.b.0, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx, align 4
+  %cmp1 = icmp ugt i32 %1, %b.05
+  %.b.0 = select i1 %cmp1, i32 %1, i32 %b.05
+  %2 = trunc i64 %indvars.iv to i32
+  %i.addr.0.bi.0 = select i1 %cmp1, i32 %2, i32 %bi.06
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %bi.0.lcssa = phi i32 [ 0, %entry ], [ %i.addr.0.bi.0, %for.body ]
+  ret i32 %bi.0.lcssa
+}
+
author	Jakob Stoklund Olesen <stoklund@2pi.dk>
	Thu, 5 Apr 2012 20:30:20 +0000 (20:30 +0000)
committer	Jakob Stoklund Olesen <stoklund@2pi.dk>
	Thu, 5 Apr 2012 20:30:20 +0000 (20:30 +0000)
lib/CodeGen/SelectionDAG/TargetLowering.cpp		patch \| blob \| history
test/CodeGen/Thumb2/lsr-deficiency.ll		patch \| blob \| history
test/CodeGen/X86/lsr-loop-exit-cond.ll		patch \| blob \| history