From e04360918b5d2116ff0e79a0f5f080428563a716 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 11 Apr 2014 16:12:01 +0000 Subject: [PATCH] SelectionDAG: Use helper function to improve legalization of ISD::MUL The TargetLowering::expandMUL() helper contains lowering code extracted from the DAGTypeLegalizer and allows the SelectionDAGLegalizer to expand more ISD::MUL patterns without having to use a library call. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206037 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 17 +++++++ test/CodeGen/R600/mul.ll | 63 +++++++++++++++++------- test/CodeGen/R600/mul_uint24-i64.ll | 24 --------- test/CodeGen/R600/mul_uint24.ll | 18 +++++++ 4 files changed, 80 insertions(+), 42 deletions(-) delete mode 100644 test/CodeGen/R600/mul_uint24-i64.ll diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 20afb3d5942..230747c76bb 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3625,6 +3625,23 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(1))); break; } + + SDValue Lo, Hi; + EVT HalfType = VT.getHalfSizedIntegerVT(*DAG.getContext()); + if (TLI.isOperationLegalOrCustom(ISD::ZERO_EXTEND, VT) && + TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND, VT) && + TLI.isOperationLegalOrCustom(ISD::SHL, VT) && + TLI.isOperationLegalOrCustom(ISD::OR, VT) && + TLI.expandMUL(Node, Lo, Hi, HalfType, DAG)) { + Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo); + Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi); + SDValue Shift = DAG.getConstant(HalfType.getSizeInBits(), + TLI.getShiftAmountTy(HalfType)); + Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); + Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi)); + break; + } + Tmp1 = ExpandIntLibCall(Node, false, RTLIB::MUL_I8, RTLIB::MUL_I16, RTLIB::MUL_I32, diff --git a/test/CodeGen/R600/mul.ll b/test/CodeGen/R600/mul.ll index e1761485835..6ed754c5aa7 100644 --- a/test/CodeGen/R600/mul.ll +++ b/test/CodeGen/R600/mul.ll @@ -1,15 +1,14 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s -; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s --check-prefix=FUNC +; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s ; mul24 and mad24 are affected -;EG-CHECK: @test2 -;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;FUNC-LABEL: @test2 +;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: @test2 -;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 @@ -20,17 +19,16 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { ret void } -;EG-CHECK: @test4 -;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;FUNC-LABEL: @test4 +;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: @test4 -;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 @@ -52,3 +50,32 @@ define void @trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) { store i32 %trunc, i32 addrspace(1)* %out, align 8 ret void } + +; This 64-bit multiply should just use MUL_HI and MUL_LO, since the top +; 32-bits of both arguments are sign bits. +; FUNC-LABEL: @mul64_sext_c +; EG-DAG: MULLO_INT +; EG-DAG: MULHI_INT +; SI-DAG: V_MUL_LO_I32 +; SI-DAG: V_MUL_HI_I32 +define void @mul64_sext_c(i64 addrspace(1)* %out, i32 %in) { +entry: + %0 = sext i32 %in to i64 + %1 = mul i64 %0, 80 + store i64 %1, i64 addrspace(1)* %out + ret void +} + +; A standard 64-bit multiply. The expansion should be around 6 instructions. +; It would be difficult to match the expansion correctly without writing +; a really complicated list of FileCheck expressions. I don't want +; to confuse people who may 'break' this test with a correct optimization, +; so this test just uses FUNC-LABEL to make sure the compiler does not +; crash with a 'failed to select' error. +; FUNC-LABEL: @mul64 +define void @mul64(i64 addrspace(1)* %out, i64 %a, i64 %b) { +entry: + %0 = mul i64 %a, %b + store i64 %0, i64 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/mul_uint24-i64.ll b/test/CodeGen/R600/mul_uint24-i64.ll deleted file mode 100644 index 95b3bcbf8e8..00000000000 --- a/test/CodeGen/R600/mul_uint24-i64.ll +++ /dev/null @@ -1,24 +0,0 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC -; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC - -; FIXME: Move this test into mul_uint24.ll once i64 mul is supported. -; XFAIL: * - -; Multiply with 24-bit inputs and 64-bit output -; FUNC_LABEL: @mul24_i64 -; EG; MUL_UINT24 -; EG: MULHI -; SI: V_MUL_U32_U24 -; FIXME: SI support 24-bit mulhi -; SI: V_MUL_HI_U32 -define void @mul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { -entry: - %0 = shl i64 %a, 40 - %a_24 = lshr i64 %0, 40 - %1 = shl i64 %b, 40 - %b_24 = lshr i64 %1, 40 - %2 = mul i64 %a_24, %b_24 - store i64 %2, i64 addrspace(1)* %out - ret void -} diff --git a/test/CodeGen/R600/mul_uint24.ll b/test/CodeGen/R600/mul_uint24.ll index 27b3717f6b7..419f2751b83 100644 --- a/test/CodeGen/R600/mul_uint24.ll +++ b/test/CodeGen/R600/mul_uint24.ll @@ -46,3 +46,21 @@ entry: store i32 %1, i32 addrspace(1)* %out ret void } + +; Multiply with 24-bit inputs and 64-bit output +; FUNC_LABEL: @mul24_i64 +; EG; MUL_UINT24 +; EG: MULHI +; SI: V_MUL_U32_U24 +; FIXME: SI support 24-bit mulhi +; SI: V_MUL_HI_U32 +define void @mul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { +entry: + %0 = shl i64 %a, 40 + %a_24 = lshr i64 %0, 40 + %1 = shl i64 %b, 40 + %b_24 = lshr i64 %1, 40 + %2 = mul i64 %a_24, %b_24 + store i64 %2, i64 addrspace(1)* %out + ret void +} -- 2.34.1