From: Eric Christopher Date: Sat, 18 Jun 2011 00:09:57 +0000 (+0000) Subject: Fix UMULO support for 2x register width to allow the full X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=a3071455e540530ecb644a2e59098129f70ce705;p=oota-llvm.git Fix UMULO support for 2x register width to allow the full range without a libcall to a new mulo libcall that we'd have to create. Finishes the rest of rdar://9090077 and rdar://9210061 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133318 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 6797bece13c..cbb8da8f18d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2160,6 +2160,27 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, const Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); DebugLoc dl = N->getDebugLoc(); + // A divide for UMULO should be faster than a function call. + if (N->getOpcode() == ISD::UMULO) { + SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + + SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS); + SplitInteger(MUL, Lo, Hi); + + // A divide for UMULO will be faster than a function call. Select to + // make sure we aren't using 0. + SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), + RHS, DAG.getConstant(0, VT), ISD::SETNE); + SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero, + DAG.getConstant(1, VT), RHS); + SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero); + SDValue Overflow; + Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE); + ReplaceValueWith(SDValue(N, 1), Overflow); + return; + } + // Replace this with a libcall that will check overflow. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i32) diff --git a/test/CodeGen/X86/muloti.ll b/test/CodeGen/X86/muloti.ll index eb9b6460cdd..2f0986e831e 100644 --- a/test/CodeGen/X86/muloti.ll +++ b/test/CodeGen/X86/muloti.ll @@ -2,9 +2,8 @@ %0 = type { i64, i64 } %1 = type { i128, i1 } -@.str = private unnamed_addr constant [11 x i8] c"%llx %llx\0A\00", align 1 - define %0 @x(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp { +; CHECK: x entry: %tmp16 = zext i64 %a.coerce0 to i128 %tmp11 = zext i64 %a.coerce1 to i128 @@ -33,6 +32,50 @@ nooverflow: ; preds = %entry ret %0 %tmp24 } +define %0 @foo(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp { +entry: +; CHECK: foo + %retval = alloca i128, align 16 + %coerce = alloca i128, align 16 + %a.addr = alloca i128, align 16 + %coerce1 = alloca i128, align 16 + %b.addr = alloca i128, align 16 + %0 = bitcast i128* %coerce to %0* + %1 = getelementptr %0* %0, i32 0, i32 0 + store i64 %a.coerce0, i64* %1 + %2 = getelementptr %0* %0, i32 0, i32 1 + store i64 %a.coerce1, i64* %2 + %a = load i128* %coerce, align 16 + store i128 %a, i128* %a.addr, align 16 + %3 = bitcast i128* %coerce1 to %0* + %4 = getelementptr %0* %3, i32 0, i32 0 + store i64 %b.coerce0, i64* %4 + %5 = getelementptr %0* %3, i32 0, i32 1 + store i64 %b.coerce1, i64* %5 + %b = load i128* %coerce1, align 16 + store i128 %b, i128* %b.addr, align 16 + %tmp = load i128* %a.addr, align 16 + %tmp2 = load i128* %b.addr, align 16 + %6 = call %1 @llvm.umul.with.overflow.i128(i128 %tmp, i128 %tmp2) +; CHECK: cmov +; CHECK: divti3 + %7 = extractvalue %1 %6, 0 + %8 = extractvalue %1 %6, 1 + br i1 %8, label %overflow, label %nooverflow + +overflow: ; preds = %entry + call void @llvm.trap() + unreachable + +nooverflow: ; preds = %entry + store i128 %7, i128* %retval + %9 = bitcast i128* %retval to %0* + %10 = load %0* %9, align 1 + ret %0 %10 +} + +declare %1 @llvm.umul.with.overflow.i128(i128, i128) nounwind readnone + declare %1 @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone declare void @llvm.trap() nounwind