From 362fee90b9a1d64ac091755466caf6a94ade22eb Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Fri, 17 Jun 2011 20:41:29 +0000 Subject: [PATCH] Lower multiply with overflow checking to __mulo calls if we haven't been able to lower them any other way. Fixes rdar://9090077 and rdar://9210061 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133288 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/RuntimeLibcalls.h | 5 +- .../SelectionDAG/LegalizeIntegerTypes.cpp | 63 +++++++++++++++++++ lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + lib/CodeGen/SelectionDAG/TargetLowering.cpp | 11 ++-- test/CodeGen/X86/muloti.ll | 38 +++++++++++ 5 files changed, 113 insertions(+), 5 deletions(-) create mode 100644 test/CodeGen/X86/muloti.ll diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h index 576be821774..44d94779784 100644 --- a/include/llvm/CodeGen/RuntimeLibcalls.h +++ b/include/llvm/CodeGen/RuntimeLibcalls.h @@ -22,7 +22,7 @@ namespace RTLIB { /// RTLIB::Libcall enum - This enum defines all of the runtime library calls /// the backend can emit. The various long double types cannot be merged, /// because 80-bit library functions use "xf" and 128-bit use "tf". - /// + /// /// When adding PPCF128 functions here, note that their names generally need /// to be overridden for Darwin with the xxx$LDBL128 form. See /// PPCISelLowering.cpp. @@ -46,6 +46,9 @@ namespace RTLIB { MUL_I32, MUL_I64, MUL_I128, + MULO_I32, + MULO_I64, + MULO_I128, SDIV_I8, SDIV_I16, SDIV_I32, diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index da75b8afd18..f0aa9d63101 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -19,6 +19,7 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" +#include "llvm/DerivedTypes.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -1072,6 +1073,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break; case ISD::UADDO: case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break; + case ISD::UMULO: + case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -2149,6 +2152,66 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, ReplaceValueWith(SDValue(N, 1), Ofl); } +void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + const Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); + EVT PtrVT = TLI.getPointerTy(); + const Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); + DebugLoc dl = N->getDebugLoc(); + + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i32) + LC = RTLIB::MULO_I32; + else if (VT == MVT::i64) + LC = RTLIB::MULO_I64; + else if (VT == MVT::i128) + LC = RTLIB::MULO_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!"); + + SDValue Temp = DAG.CreateStackTemporary(PtrVT); + // Temporary for the overflow value, default it to zero. + SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, + DAG.getConstant(0, PtrVT), Temp, + MachinePointerInfo(), false, false, 0); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + EVT ArgVT = N->getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = N->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = true; + Entry.isZExt = false; + Args.push_back(Entry); + } + + // Also pass the address of the overflow check. + Entry.Node = Temp; + Entry.Ty = PtrTy->getPointerTo(); + Entry.isSExt = true; + Entry.isZExt = false; + Args.push_back(Entry); + + SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT); + std::pair CallInfo = + TLI.LowerCallTo(Chain, RetTy, true, false, false, false, + 0, TLI.getLibcallCallingConv(LC), false, + true, Func, Args, DAG, dl); + + SplitInteger(CallInfo.first, Lo, Hi); + SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, + MachinePointerInfo(), false, false, 0); + SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2, + DAG.getConstant(0, PtrVT), + ISD::SETNE); + // Use the overflow from the libcall everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); +} + void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 06dc40f195d..4597ec93356 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -318,6 +318,7 @@ private: void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index efbfaa45338..474dd7a9fcd 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -81,6 +81,9 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::MUL_I32] = "__mulsi3"; Names[RTLIB::MUL_I64] = "__muldi3"; Names[RTLIB::MUL_I128] = "__multi3"; + Names[RTLIB::MULO_I32] = "__mulosi4"; + Names[RTLIB::MULO_I64] = "__mulodi4"; + Names[RTLIB::MULO_I128] = "__muloti4"; Names[RTLIB::SDIV_I8] = "__divqi3"; Names[RTLIB::SDIV_I16] = "__divhi3"; Names[RTLIB::SDIV_I32] = "__divsi3"; @@ -1914,7 +1917,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // comparisons. if (isa(N0.getNode())) return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); - + if (ConstantSDNode *N1C = dyn_cast(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); @@ -2673,9 +2676,9 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector &Ops, SelectionDAG &DAG) const { - + if (Constraint.length() > 1) return; - + char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { default: break; @@ -2865,7 +2868,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( report_fatal_error("Indirect operand for inline asm not a pointer!"); OpTy = PtrTy->getElementType(); } - + // Look for vector wrapped in a struct. e.g. { <16 x i8> }. if (const StructType *STy = dyn_cast(OpTy)) if (STy->getNumElements() == 1) diff --git a/test/CodeGen/X86/muloti.ll b/test/CodeGen/X86/muloti.ll new file mode 100644 index 00000000000..eb9b6460cdd --- /dev/null +++ b/test/CodeGen/X86/muloti.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +%0 = type { i64, i64 } +%1 = type { i128, i1 } + +@.str = private unnamed_addr constant [11 x i8] c"%llx %llx\0A\00", align 1 + +define %0 @x(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp { +entry: + %tmp16 = zext i64 %a.coerce0 to i128 + %tmp11 = zext i64 %a.coerce1 to i128 + %tmp12 = shl nuw i128 %tmp11, 64 + %ins14 = or i128 %tmp12, %tmp16 + %tmp6 = zext i64 %b.coerce0 to i128 + %tmp3 = zext i64 %b.coerce1 to i128 + %tmp4 = shl nuw i128 %tmp3, 64 + %ins = or i128 %tmp4, %tmp6 + %0 = tail call %1 @llvm.smul.with.overflow.i128(i128 %ins14, i128 %ins) +; CHECK: callq ___muloti4 + %1 = extractvalue %1 %0, 0 + %2 = extractvalue %1 %0, 1 + br i1 %2, label %overflow, label %nooverflow + +overflow: ; preds = %entry + tail call void @llvm.trap() + unreachable + +nooverflow: ; preds = %entry + %tmp20 = trunc i128 %1 to i64 + %tmp21 = insertvalue %0 undef, i64 %tmp20, 0 + %tmp22 = lshr i128 %1, 64 + %tmp23 = trunc i128 %tmp22 to i64 + %tmp24 = insertvalue %0 %tmp21, i64 %tmp23, 1 + ret %0 %tmp24 +} + +declare %1 @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone + +declare void @llvm.trap() nounwind -- 2.34.1