From 187ac426865c0f3629fcc22ee9cd473d90cea121 Mon Sep 17 00:00:00 2001 From: Jan Vesely Date: Mon, 13 Apr 2015 15:32:01 +0000 Subject: [PATCH] LegalizeDAG: Try to use Overflow operations when expanding ADD/SUB v2: consider BooleanContents when processing overflow Signed-off-by: Jan Vesely Reviewers: resistor, jholewinsky (nvidia parts) Differential Revision: http://reviews.llvm.org/D6340 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234755 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 33 +++++++++++++++++++ test/CodeGen/Hexagon/adde.ll | 11 ++----- test/CodeGen/Hexagon/sube.ll | 9 ++--- test/CodeGen/NVPTX/add-128bit.ll | 4 +-- test/CodeGen/R600/sub.ll | 10 +++--- 5 files changed, 43 insertions(+), 24 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 25e80b9c736..e4a796e5478 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1629,6 +1629,39 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, return; } + bool hasOVF = + TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? + ISD::UADDO : ISD::USUBO, + TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + if (hasOVF) { + SDVTList VTList = DAG.getVTList(NVT, NVT); + TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); + int RevOpc; + if (N->getOpcode() == ISD::ADD) { + RevOpc = ISD::SUB; + Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps); + Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); + } else { + RevOpc = ISD::ADD; + Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps); + Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2)); + } + SDValue OVF = Lo.getValue(1); + + switch (BoolType) { + default: + case TargetLoweringBase::UndefinedBooleanContent: + OVF = DAG.getNode(ISD::AND, dl, NVT, DAG.getConstant(1, NVT), OVF); + // Fallthrough + case TargetLoweringBase::ZeroOrOneBooleanContent: + Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF); + break; + case TargetLoweringBase::ZeroOrNegativeOneBooleanContent: + Hi = DAG.getNode(RevOpc, dl, NVT, Hi, OVF); + } + return; + } + if (N->getOpcode() == ISD::ADD) { Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps); Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); diff --git a/test/CodeGen/Hexagon/adde.ll b/test/CodeGen/Hexagon/adde.ll index 5a8345c477c..169899d7126 100644 --- a/test/CodeGen/Hexagon/adde.ll +++ b/test/CodeGen/Hexagon/adde.ll @@ -1,16 +1,9 @@ ; RUN: llc -march=hexagon -hexagon-expand-condsets=0 < %s | FileCheck %s -; CHECK: r{{[0-9]+:[0-9]+}} = #0 -; CHECK: r{{[0-9]+:[0-9]+}} = #1 +; CHECK: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}}) ; CHECK: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}}) ; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}}) -; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}}) -; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}) -; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}) -; CHECK: r{{[0-9]+:[0-9]+}} = combine(r{{[0-9]+}}, r{{[0-9]+}}) -; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}) -; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}) -; CHECK: r{{[0-9]+:[0-9]+}} = combine(r{{[0-9]+}}, r{{[0-9]+}}) +; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, #1, #0) ; CHECK: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}}) diff --git a/test/CodeGen/Hexagon/sube.ll b/test/CodeGen/Hexagon/sube.ll index 1a7882276c8..b6d77a9df5f 100644 --- a/test/CodeGen/Hexagon/sube.ll +++ b/test/CodeGen/Hexagon/sube.ll @@ -1,13 +1,10 @@ ; RUN: llc -march=hexagon -hexagon-expand-condsets=0 < %s | FileCheck %s -; CHECK: r{{[0-9]+:[0-9]+}} = #0 -; CHECK: r{{[0-9]+:[0-9]+}} = #1 -; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}}) -; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}) -; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}) ; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}}) ; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}}) -; CHECK: r{{[0-9]+:[0-9]+}} = combine(r{{[0-9]+}}, r{{[0-9]+}}) +; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}}) +; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, #1, #0) +; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}}) define void @check_sube_subc(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) { entry: diff --git a/test/CodeGen/NVPTX/add-128bit.ll b/test/CodeGen/NVPTX/add-128bit.ll index 29e3cdffae7..c1144def8c2 100644 --- a/test/CodeGen/NVPTX/add-128bit.ll +++ b/test/CodeGen/NVPTX/add-128bit.ll @@ -7,10 +7,8 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define void @foo(i64 %a, i64 %add, i128* %retptr) { ; CHECK: add.s64 ; CHECK: setp.lt.u64 -; CHECK: setp.lt.u64 -; CHECK: selp.b64 ; CHECK: selp.b64 -; CHECK: add.s64 +; CHECK: sub.s64 %t1 = sext i64 %a to i128 %add2 = zext i64 %add to i128 %val = add i128 %t1, %add2 diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll index 03303f595de..7216a312ebc 100644 --- a/test/CodeGen/R600/sub.ll +++ b/test/CodeGen/R600/sub.ll @@ -58,11 +58,10 @@ define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1) ; SI: s_sub_u32 ; SI: s_subb_u32 -; EG-DAG: SETGE_UINT -; EG-DAG: CNDE_INT -; EG-DAG: SUB_INT ; EG-DAG: SUB_INT +; EG-DAG: SETGT_UINT ; EG-DAG: SUB_INT +; EG-DAG: ADD_INT define void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind { %result = sub i64 %a, %b store i64 %result, i64 addrspace(1)* %out, align 8 @@ -73,11 +72,10 @@ define void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind ; SI: v_sub_i32_e32 ; SI: v_subb_u32_e32 -; EG-DAG: SETGE_UINT -; EG-DAG: CNDE_INT -; EG-DAG: SUB_INT ; EG-DAG: SUB_INT +; EG-DAG: SETGT_UINT ; EG-DAG: SUB_INT +; EG-DAG: ADD_INT define void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind { %tid = call i32 @llvm.r600.read.tidig.x() readnone %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid -- 2.34.1