From 362fee90b9a1d64ac091755466caf6a94ade22eb Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@apple.com>
Date: Fri, 17 Jun 2011 20:41:29 +0000
Subject: [PATCH] Lower multiply with overflow checking to __mulo<mode> calls
 if we haven't been able to lower them any other way.

Fixes rdar://9090077 and rdar://9210061


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133288 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/CodeGen/RuntimeLibcalls.h        |  5 +-
 .../SelectionDAG/LegalizeIntegerTypes.cpp     | 63 +++++++++++++++++++
 lib/CodeGen/SelectionDAG/LegalizeTypes.h      |  1 +
 lib/CodeGen/SelectionDAG/TargetLowering.cpp   | 11 ++--
 test/CodeGen/X86/muloti.ll                    | 38 +++++++++++
 5 files changed, 113 insertions(+), 5 deletions(-)
 create mode 100644 test/CodeGen/X86/muloti.ll
diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h
index 576be821774..44d94779784 100644
--- a/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -22,7 +22,7 @@ namespace RTLIB {
   /// RTLIB::Libcall enum - This enum defines all of the runtime library calls
   /// the backend can emit.  The various long double types cannot be merged,
   /// because 80-bit library functions use "xf" and 128-bit use "tf".
-  /// 
+  ///
   /// When adding PPCF128 functions here, note that their names generally need
   /// to be overridden for Darwin with the xxx$LDBL128 form.  See
   /// PPCISelLowering.cpp.
@@ -46,6 +46,9 @@ namespace RTLIB {
     MUL_I32,
     MUL_I64,
     MUL_I128,
+    MULO_I32,
+    MULO_I64,
+    MULO_I128,
     SDIV_I8,
     SDIV_I16,
     SDIV_I32,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index da75b8afd18..f0aa9d63101 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -19,6 +19,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -1072,6 +1073,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
   case ISD::UADDO:
   case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
+  case ISD::UMULO:
+  case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break;
   }
 
   // If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -2149,6 +2152,66 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
   ReplaceValueWith(SDValue(N, 1), Ofl);
 }
 
+void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
+                                          SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  const Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
+  EVT PtrVT = TLI.getPointerTy();
+  const Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
+  DebugLoc dl = N->getDebugLoc();
+
+  // Expand the result by simply replacing it with the equivalent
+  // non-overflow-checking operation.
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i32)
+    LC = RTLIB::MULO_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::MULO_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::MULO_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!");
+
+  SDValue Temp = DAG.CreateStackTemporary(PtrVT);
+  // Temporary for the overflow value, default it to zero.
+  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl,
+			       DAG.getConstant(0, PtrVT), Temp,
+			       MachinePointerInfo(), false, false, 0);
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = N->getOperand(i).getValueType();
+    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = N->getOperand(i);
+    Entry.Ty = ArgTy;
+    Entry.isSExt = true;
+    Entry.isZExt = false;
+    Args.push_back(Entry);
+  }
+
+  // Also pass the address of the overflow check.
+  Entry.Node = Temp;
+  Entry.Ty = PtrTy->getPointerTo();
+  Entry.isSExt = true;
+  Entry.isZExt = false;
+  Args.push_back(Entry);
+
+  SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
+  std::pair<SDValue, SDValue> CallInfo =
+    TLI.LowerCallTo(Chain, RetTy, true, false, false, false,
+		    0, TLI.getLibcallCallingConv(LC), false,
+		    true, Func, Args, DAG, dl);
+
+  SplitInteger(CallInfo.first, Lo, Hi);
+  SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
+			      MachinePointerInfo(), false, false, 0);
+  SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
+                             DAG.getConstant(0, PtrVT),
+                             ISD::SETNE);
+  // Use the overflow from the libcall everywhere.
+  ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
 void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
   EVT VT = N->getValueType(0);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 06dc40f195d..4597ec93356 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -318,6 +318,7 @@ private:
 
   void ExpandIntRes_SADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_UADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_XMULO             (SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void ExpandShiftByConstant(SDNode *N, unsigned Amt,
                              SDValue &Lo, SDValue &Hi);
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index efbfaa45338..474dd7a9fcd 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -81,6 +81,9 @@ static void InitLibcallNames(const char **Names) {
   Names[RTLIB::MUL_I32] = "__mulsi3";
   Names[RTLIB::MUL_I64] = "__muldi3";
   Names[RTLIB::MUL_I128] = "__multi3";
+  Names[RTLIB::MULO_I32] = "__mulosi4";
+  Names[RTLIB::MULO_I64] = "__mulodi4";
+  Names[RTLIB::MULO_I128] = "__muloti4";
   Names[RTLIB::SDIV_I8] = "__divqi3";
   Names[RTLIB::SDIV_I16] = "__divhi3";
   Names[RTLIB::SDIV_I32] = "__divsi3";
@@ -1914,7 +1917,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
   // comparisons.
   if (isa<ConstantSDNode>(N0.getNode()))
     return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
-  
+
   if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
     const APInt &C1 = N1C->getAPIntValue();
 
@@ -2673,9 +2676,9 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
                                                   std::string &Constraint,
                                                   std::vector<SDValue> &Ops,
                                                   SelectionDAG &DAG) const {
-  
+
   if (Constraint.length() > 1) return;
-  
+
   char ConstraintLetter = Constraint[0];
   switch (ConstraintLetter) {
   default: break;
@@ -2865,7 +2868,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
           report_fatal_error("Indirect operand for inline asm not a pointer!");
         OpTy = PtrTy->getElementType();
       }
-      
+
       // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
       if (const StructType *STy = dyn_cast<StructType>(OpTy))
         if (STy->getNumElements() == 1)
diff --git a/test/CodeGen/X86/muloti.ll b/test/CodeGen/X86/muloti.ll
new file mode 100644
index 00000000000..eb9b6460cdd
--- /dev/null
+++ b/test/CodeGen/X86/muloti.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+%0 = type { i64, i64 }
+%1 = type { i128, i1 }
+
+@.str = private unnamed_addr constant [11 x i8] c"%llx %llx\0A\00", align 1
+
+define %0 @x(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp {
+entry:
+  %tmp16 = zext i64 %a.coerce0 to i128
+  %tmp11 = zext i64 %a.coerce1 to i128
+  %tmp12 = shl nuw i128 %tmp11, 64
+  %ins14 = or i128 %tmp12, %tmp16
+  %tmp6 = zext i64 %b.coerce0 to i128
+  %tmp3 = zext i64 %b.coerce1 to i128
+  %tmp4 = shl nuw i128 %tmp3, 64
+  %ins = or i128 %tmp4, %tmp6
+  %0 = tail call %1 @llvm.smul.with.overflow.i128(i128 %ins14, i128 %ins)
+; CHECK: callq   ___muloti4
+  %1 = extractvalue %1 %0, 0
+  %2 = extractvalue %1 %0, 1
+  br i1 %2, label %overflow, label %nooverflow
+
+overflow:                                         ; preds = %entry
+  tail call void @llvm.trap()
+  unreachable
+
+nooverflow:                                       ; preds = %entry
+  %tmp20 = trunc i128 %1 to i64
+  %tmp21 = insertvalue %0 undef, i64 %tmp20, 0
+  %tmp22 = lshr i128 %1, 64
+  %tmp23 = trunc i128 %tmp22 to i64
+  %tmp24 = insertvalue %0 %tmp21, i64 %tmp23, 1
+  ret %0 %tmp24
+}
+
+declare %1 @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone
+
+declare void @llvm.trap() nounwind
-- 
2.34.1