Shifter ops are not always free. Do not fold them (especially to form

author Evan Cheng <evan.cheng@apple.com>

Wed, 27 Oct 2010 23:41:30 +0000 (23:41 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Wed, 27 Oct 2010 23:41:30 +0000 (23:41 +0000)
author Evan Cheng <evan.cheng@apple.com>
Wed, 27 Oct 2010 23:41:30 +0000 (23:41 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Wed, 27 Oct 2010 23:41:30 +0000 (23:41 +0000)
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp

index 5285a43735c3528090a515b9a3a9bafe9452af98..ec66ceae2a19bd262c9bceb6bf0dab3b69d70421 100644 (file)
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -78,8 +78,12 @@ public:
  
    SDNode *Select(SDNode *N);
  
+  bool isShifterOpProfitable(const SDValue &Shift,
+                             ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
    bool SelectShifterOperandReg(SDValue N, SDValue &A,
                                 SDValue &B, SDValue &C);
+  bool SelectShiftShifterOperandReg(SDValue N, SDValue &A,
+                                    SDValue &B, SDValue &C);
    bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
    bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
  
@@ -246,6 +250,17 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
  }
  
  
+bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
+                                            ARM_AM::ShiftOpc ShOpcVal,
+                                            unsigned ShAmt) {
+  if (!Subtarget->isCortexA9())
+    return true;
+  if (Shift.hasOneUse())
+    return true;
+  // R << 2 is free.
+  return ShOpcVal == ARM_AM::lsl && ShAmt == 2;
+}
+
  bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
                                                SDValue &BaseReg,
                                                SDValue &ShReg,
@@ -261,6 +276,32 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
  
    BaseReg = N.getOperand(0);
    unsigned ShImmVal = 0;
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    ShReg = CurDAG->getRegister(0, MVT::i32);
+    ShImmVal = RHS->getZExtValue() & 31;
+  } else {
+    ShReg = N.getOperand(1);
+    if (!isShifterOpProfitable(N, ShOpcVal, ShImmVal))
+      return false;
+  }
+  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+                                  MVT::i32);
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectShiftShifterOperandReg(SDValue N,
+                                                   SDValue &BaseReg,
+                                                   SDValue &ShReg,
+                                                   SDValue &Opc) {
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+  // Don't match base register only case. That is matched to a separate
+  // lower complexity pattern with explicit register operand.
+  if (ShOpcVal == ARM_AM::no_shift) return false;
+
+  BaseReg = N.getOperand(0);
+  unsigned ShImmVal = 0;
+  // Do not check isShifterOpProfitable. This must return true.
    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
      ShReg = CurDAG->getRegister(0, MVT::i32);
      ShImmVal = RHS->getZExtValue() & 31;
@@ -321,7 +362,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
  
  bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
                                        SDValue &Opc) {
-  if (N.getOpcode() == ISD::MUL) {
+  if (N.getOpcode() == ISD::MUL &&
+      (!Subtarget->isCortexA9() || N.hasOneUse())) {
      if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
        // X * [3,5,9] -> X + X * [2,4,8] etc.
        int RHSC = (int)RHS->getZExtValue();
@@ -357,6 +399,10 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
      }
    }
  
+  if (Subtarget->isCortexA9() && !N.hasOneUse())
+    // Compute R +/- (R << N) and reuse it.
+    return false;
+
    // Otherwise this is R +/- [possibly shifted] R.
    ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
    ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
@@ -371,14 +417,20 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
      if (ConstantSDNode *Sh =
             dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
        ShAmt = Sh->getZExtValue();
-      Offset = N.getOperand(1).getOperand(0);
+      if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+        Offset = N.getOperand(1).getOperand(0);
+      else {
+        ShAmt = 0;
+        ShOpcVal = ARM_AM::no_shift;
+      }
      } else {
        ShOpcVal = ARM_AM::no_shift;
      }
    }
  
    // Try matching (R shl C) + (R).
-  if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
+  if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift &&
+      !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
      ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
      if (ShOpcVal != ARM_AM::no_shift) {
        // Check to see if the RHS of the shift is a constant, if not, we can't
@@ -386,8 +438,15 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
        if (ConstantSDNode *Sh =
            dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
          ShAmt = Sh->getZExtValue();
-        Offset = N.getOperand(0).getOperand(0);
-        Base = N.getOperand(1);
+        if (!Subtarget->isCortexA9() ||
+            (N.hasOneUse() &&
+             isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
+          Offset = N.getOperand(0).getOperand(0);
+          Base = N.getOperand(1);
+        } else {
+          ShAmt = 0;
+          ShOpcVal = ARM_AM::no_shift;
+        }
        } else {
          ShOpcVal = ARM_AM::no_shift;
        }
@@ -408,7 +467,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
                                                       SDValue &Base,
                                                       SDValue &Offset,
                                                       SDValue &Opc) {
-  if (N.getOpcode() == ISD::MUL) {
+  if (N.getOpcode() == ISD::MUL &&
+      (!Subtarget->isCortexA9() || N.hasOneUse())) {
      if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
        // X * [3,5,9] -> X + X * [2,4,8] etc.
        int RHSC = (int)RHS->getZExtValue();
@@ -474,6 +534,16 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
      }
    }
  
+  if (Subtarget->isCortexA9() && !N.hasOneUse()) {
+    // Compute R +/- (R << N) and reuse it.
+    Base = N;
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+                                                      ARM_AM::no_shift),
+                                    MVT::i32);
+    return AM2_BASE;
+  }
+
    // Otherwise this is R +/- [possibly shifted] R.
    ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
    ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
@@ -488,14 +558,20 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
      if (ConstantSDNode *Sh =
             dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
        ShAmt = Sh->getZExtValue();
-      Offset = N.getOperand(1).getOperand(0);
+      if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+        Offset = N.getOperand(1).getOperand(0);
+      else {
+        ShAmt = 0;
+        ShOpcVal = ARM_AM::no_shift;
+      }
      } else {
        ShOpcVal = ARM_AM::no_shift;
      }
    }
  
    // Try matching (R shl C) + (R).
-  if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
+  if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift &&
+      !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
      ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
      if (ShOpcVal != ARM_AM::no_shift) {
        // Check to see if the RHS of the shift is a constant, if not, we can't
@@ -503,8 +579,15 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
        if (ConstantSDNode *Sh =
            dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
          ShAmt = Sh->getZExtValue();
-        Offset = N.getOperand(0).getOperand(0);
-        Base = N.getOperand(1);
+        if (!Subtarget->isCortexA9() ||
+            (N.hasOneUse() &&
+             isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
+          Offset = N.getOperand(0).getOperand(0);
+          Base = N.getOperand(1);
+        } else {
+          ShAmt = 0;
+          ShOpcVal = ARM_AM::no_shift;
+        }
        } else {
          ShOpcVal = ARM_AM::no_shift;
        }
@@ -543,7 +626,12 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
      // it.
      if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
        ShAmt = Sh->getZExtValue();
-      Offset = N.getOperand(0);
+      if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
+        Offset = N.getOperand(0);
+      else {
+        ShAmt = 0;
+        ShOpcVal = ARM_AM::no_shift;
+      }
      } else {
        ShOpcVal = ARM_AM::no_shift;
      }
@@ -959,6 +1047,12 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
        return false;
    }
  
+  if (Subtarget->isCortexA9() && !N.hasOneUse()) {
+    // Compute R + (R << [1,2,3]) and reuse it.
+    Base = N;
+    return false;
+  }
+
    // Look for (R + R) or (R + (R << [1,2,3])).
    unsigned ShAmt = 0;
    Base   = N.getOperand(0);
@@ -977,11 +1071,12 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
      // it.
      if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
        ShAmt = Sh->getZExtValue();
-      if (ShAmt >= 4) {
+      if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
+        OffReg = OffReg.getOperand(0);
+      else {
          ShAmt = 0;
          ShOpcVal = ARM_AM::no_shift;
-      } else
-        OffReg = OffReg.getOperand(0);
+      }
      } else {
        ShOpcVal = ARM_AM::no_shift;
      }
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td

index 5e49cf1807e9893361839c0f62cdf5c10439f71f..0974890e9562ce4c42a89f7063b6ab60cbfa520e 100644 (file)
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -325,6 +325,13 @@ def so_reg : Operand<i32>,    // reg reg imm
    let PrintMethod = "printSORegOperand";
    let MIOperandInfo = (ops GPR, GPR, i32imm);
  }
+def shift_so_reg : Operand<i32>,    // reg reg imm
+                   ComplexPattern<i32, 3, "SelectShiftShifterOperandReg",
+                                  [shl,srl,sra,rotr]> {
+  string EncoderMethod = "getSORegOpValue";
+  let PrintMethod = "printSORegOperand";
+  let MIOperandInfo = (ops GPR, GPR, i32imm);
+}
  
  // so_imm - Match a 32-bit shifter_operand immediate operand, which is an
  // 8-bit immediate rotated by an arbitrary number of bits.  so_imm values are
@@ -1715,9 +1722,10 @@ def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
    let Inst{15-12} = Rd;
  }
  
-def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins so_reg:$src),
+def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src),
                  DPSoRegFrm, IIC_iMOVsr,
-                "mov", "\t$Rd, $src", [(set GPR:$Rd, so_reg:$src)]>, UnaryDP {
+                "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg:$src)]>,
+                UnaryDP {
    bits<4> Rd;
    bits<12> src;
    let Inst{15-12} = Rd;
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll

index 2bbe9fd2602c2ec1d4914b30b6e1f367bbffc9c7..897fb1af01cf59b2676764f172011d196e0a6b68 100644 (file)
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -1,18 +1,72 @@
-; RUN: llc < %s -march=arm | grep add | grep lsl
-; RUN: llc < %s -march=arm | grep bic | grep asr
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9
+; rdar://8576755
  
  
  define i32 @test1(i32 %X, i32 %Y, i8 %sh) {
-        %shift.upgrd.1 = zext i8 %sh to i32             ; <i32> [#uses=1]
-        %A = shl i32 %Y, %shift.upgrd.1         ; <i32> [#uses=1]
-        %B = add i32 %X, %A             ; <i32> [#uses=1]
+; A8: test1:
+; A8: add r0, r0, r1, lsl r2
+
+; A9: test1:
+; A9: add r0, r0, r1, lsl r2
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = shl i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
          ret i32 %B
  }
  
  define i32 @test2(i32 %X, i32 %Y, i8 %sh) {
-        %shift.upgrd.2 = zext i8 %sh to i32             ; <i32> [#uses=1]
-        %A = ashr i32 %Y, %shift.upgrd.2                ; <i32> [#uses=1]
-        %B = xor i32 %A, -1             ; <i32> [#uses=1]
-        %C = and i32 %X, %B             ; <i32> [#uses=1]
+; A8: test2:
+; A8: bic r0, r0, r1, asr r2
+
+; A9: test2:
+; A9: bic r0, r0, r1, asr r2
+        %shift.upgrd.2 = zext i8 %sh to i32
+        %A = ashr i32 %Y, %shift.upgrd.2
+        %B = xor i32 %A, -1
+        %C = and i32 %X, %B
          ret i32 %C
  }
+
+define i32 @test3(i32 %base, i32 %base2, i32 %offset) {
+entry:
+; A8: test3:
+; A8: ldr r0, [r0, r2, lsl #2]
+; A8: ldr r1, [r1, r2, lsl #2]
+
+; lsl #2 is free
+; A9: test3:
+; A9: ldr r1, [r1, r2, lsl #2]
+; A9: ldr r0, [r0, r2, lsl #2]
+        %tmp1 = shl i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i32*
+        %tmp4 = add i32 %base2, %tmp1
+        %tmp5 = inttoptr i32 %tmp4 to i32*
+        %tmp6 = load i32* %tmp3
+        %tmp7 = load i32* %tmp5
+        %tmp8 = add i32 %tmp7, %tmp6
+        ret i32 %tmp8
+}
+
+declare i8* @malloc(...)
+
+define fastcc void @test4() nounwind {
+entry:
+; A8: test4:
+; A8: ldr r1, [r0, r0, lsl #2]
+; A8: str r1, [r0, r0, lsl #2]
+
+; A9: test4:
+; A9: add r0, r0, r0, lsl #2
+; A9: ldr r1, [r0]
+; A9: str r1, [r0]
+  %0 = tail call i8* (...)* @malloc(i32 undef) nounwind
+  %1 = bitcast i8* %0 to i32*
+  %2 = sext i16 undef to i32
+  %3 = getelementptr inbounds i32* %1, i32 %2
+  %4 = load i32* %3, align 4
+  %5 = add nsw i32 %4, 1
+  store i32 %5, i32* %3, align 4
+  ret void
+}
diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp

index b216a74b2e4706b29d0170ce9ba08591ad6c522c..8b1b890538ec53b1da803d1dc1e3d023456b128c 100644 (file)
--- a/utils/TableGen/EDEmitter.cpp
+++ b/utils/TableGen/EDEmitter.cpp
@@ -586,6 +586,7 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
  
    MISC("brtarget", "kOperandTypeARMBranchTarget");                // ?
    MISC("so_reg", "kOperandTypeARMSoReg");                         // R, R, I
+  MISC("shift_so_reg", "kOperandTypeARMSoReg");                   // R, R, I
    MISC("t2_so_reg", "kOperandTypeThumb2SoReg");                   // R, I
    MISC("so_imm", "kOperandTypeARMSoImm");                         // I
    MISC("rot_imm", "kOperandTypeARMRotImm");                       // I
author	Evan Cheng <evan.cheng@apple.com>
	Wed, 27 Oct 2010 23:41:30 +0000 (23:41 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Wed, 27 Oct 2010 23:41:30 +0000 (23:41 +0000)
lib/Target/ARM/ARMISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/ARM/ARMInstrInfo.td		patch \| blob \| history
test/CodeGen/ARM/shifter_operand.ll		patch \| blob \| history
utils/TableGen/EDEmitter.cpp		patch \| blob \| history