Optimize some 64-bit multiplication by constants into two lea's or one lea + shl...

author Evan Cheng <evan.cheng@apple.com>

Sat, 28 Mar 2009 05:57:29 +0000 (05:57 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Sat, 28 Mar 2009 05:57:29 +0000 (05:57 +0000)
author Evan Cheng <evan.cheng@apple.com>
Sat, 28 Mar 2009 05:57:29 +0000 (05:57 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Sat, 28 Mar 2009 05:57:29 +0000 (05:57 +0000)
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h

index d0721d3985e43aa78d76fcaae6b133447b4d5801..587666f04a3f86dcf015d1ce17965fe6f78d5588 100644 (file)
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -791,9 +791,10 @@ public:
      bool isCalledByLegalizer() const { return CalledByLegalizer; }
      
      void AddToWorklist(SDNode *N);
-    SDValue CombineTo(SDNode *N, const std::vector<SDValue> &To);
-    SDValue CombineTo(SDNode *N, SDValue Res);
-    SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1);
+    SDValue CombineTo(SDNode *N, const std::vector<SDValue> &To,
+                      bool AddTo = true);
+    SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true);
+    SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true);
  
      void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO);
    };
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 041c5007c53d373e6b92bfe18954b62cf65e3fee..78d5d403e8625ddc5c1d9477b0a4ee4a890fc9e5 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -93,14 +93,14 @@ namespace {
      }
  
      SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
-                        bool AddTo = true);
+                      bool AddTo = true);
  
      SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
        return CombineTo(N, &Res, 1, AddTo);
      }
  
      SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
-                        bool AddTo = true) {
+                      bool AddTo = true) {
        SDValue To[] = { Res0, Res1 };
        return CombineTo(N, To, 2, AddTo);
      }
@@ -293,19 +293,19 @@ void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
  }
  
  SDValue TargetLowering::DAGCombinerInfo::
-CombineTo(SDNode *N, const std::vector<SDValue> &To) {
-  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size());
+CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
+  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
  }
  
  SDValue TargetLowering::DAGCombinerInfo::
-CombineTo(SDNode *N, SDValue Res) {
-  return ((DAGCombiner*)DC)->CombineTo(N, Res);
+CombineTo(SDNode *N, SDValue Res, bool AddTo) {
+  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
  }
  
  
  SDValue TargetLowering::DAGCombinerInfo::
-CombineTo(SDNode *N, SDValue Res0, SDValue Res1) {
-  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1);
+CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
+  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
  }
  
  void TargetLowering::DAGCombinerInfo::
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 48560590a5c4e90e26b499c6de084c92870a51e2..29174b9a6a3b33142d3737cac8714dbf432dd594 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -826,6 +826,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
    setTargetDAGCombine(ISD::SRA);
    setTargetDAGCombine(ISD::SRL);
    setTargetDAGCombine(ISD::STORE);
+  if (Subtarget->is64Bit())
+    setTargetDAGCombine(ISD::MUL);
  
    computeRegisterProperties();
  
@@ -8407,6 +8409,74 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
  }
  
  
+/// PerformMulCombine - Optimize a single multiply with constant into two
+/// in order to implement it with two cheaper instructions, e.g.
+/// LEA + SHL, LEA + LEA.
+static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  if (DAG.getMachineFunction().
+      getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+    return SDValue();
+
+  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+    return SDValue();
+
+  MVT VT = N->getValueType(0);
+  if (VT != MVT::i64)
+    return SDValue();
+
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (!C)
+    return SDValue();
+  uint64_t MulAmt = C->getZExtValue();
+  if (isPowerOf2_64(MulAmt) || MulAmt == 3 || MulAmt == 5 || MulAmt == 9)
+    return SDValue();
+
+  uint64_t MulAmt1 = 0;
+  uint64_t MulAmt2 = 0;
+  if ((MulAmt % 9) == 0) {
+    MulAmt1 = 9;
+    MulAmt2 = MulAmt / 9;
+  } else if ((MulAmt % 5) == 0) {
+    MulAmt1 = 5;
+    MulAmt2 = MulAmt / 5;
+  } else if ((MulAmt % 3) == 0) {
+    MulAmt1 = 3;
+    MulAmt2 = MulAmt / 3;
+  }
+  if (MulAmt2 &&
+      (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
+    DebugLoc DL = N->getDebugLoc();
+
+    if (isPowerOf2_64(MulAmt2) &&
+        !(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
+      // If second multiplifer is pow2, issue it first. We want the multiply by
+      // 3, 5, or 9 to be folded into the addressing mode unless the lone use
+      // is an add.
+      std::swap(MulAmt1, MulAmt2);
+
+    SDValue NewMul;
+    if (isPowerOf2_64(MulAmt1)) 
+      NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                           DAG.getConstant(Log2_64(MulAmt1), MVT::i8));
+    else
+      NewMul = DAG.getNode(ISD::MUL, DL, VT, N->getOperand(0),
+                           DAG.getConstant(MulAmt1, VT));
+
+    if (isPowerOf2_64(MulAmt2)) 
+      NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
+                           DAG.getConstant(Log2_64(MulAmt2), MVT::i8));
+    else 
+      NewMul = DAG.getNode(ISD::MUL, DL, VT, NewMul,
+                           DAG.getConstant(MulAmt2, VT));
+
+    // Do not add new nodes to DAG combiner worklist.
+    DCI.CombineTo(N, NewMul, false);
+  }
+  return SDValue();
+}
+
+
  /// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
  ///                       when possible.
  static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
@@ -8668,6 +8738,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
      return PerformBuildVectorCombine(N, DAG, DCI, Subtarget, *this);
    case ISD::SELECT:         return PerformSELECTCombine(N, DAG, Subtarget);
    case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI);
+  case ISD::MUL:            return PerformMulCombine(N, DAG, DCI);
    case ISD::SHL:
    case ISD::SRA:
    case ISD::SRL:            return PerformShiftCombine(N, DAG, Subtarget);
diff --git a/test/CodeGen/X86/imul-lea-2.ll b/test/CodeGen/X86/imul-lea-2.ll

new file mode 100644 (file)

index 0000000..0a2df1c
--- /dev/null
+++ b/test/CodeGen/X86/imul-lea-2.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep lea | count 3
+; RUN: llvm-as < %s | llc -march=x86-64 | grep shl | count 1
+; RUN: llvm-as < %s | llc -march=x86-64 | not grep imul
+
+define i64 @t1(i64 %a) nounwind readnone {
+entry:
+       %0 = mul i64 %a, 81             ; <i64> [#uses=1]
+       ret i64 %0
+}
+
+define i64 @t2(i64 %a) nounwind readnone {
+entry:
+       %0 = mul i64 %a, 40             ; <i64> [#uses=1]
+       ret i64 %0
+}
author	Evan Cheng <evan.cheng@apple.com>
	Sat, 28 Mar 2009 05:57:29 +0000 (05:57 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Sat, 28 Mar 2009 05:57:29 +0000 (05:57 +0000)
include/llvm/Target/TargetLowering.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/imul-lea-2.ll	[new file with mode: 0644]	patch \| blob