From 5689f67ef75ea0b927e0bb3c3cda9078e9bf100f Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Thu, 13 Aug 2015 01:32:30 +0000 Subject: [PATCH] [CodeGen] Mark the promoted FCOPYSIGN result FP_ROUND as TRUNCating. Now that we can properly promote mismatched FCOPYSIGNs (r244858), we can mark the FP_ROUND on the result as truncating, to expose folding. FCOPYSIGN doesn't change anything but the sign bit, so (fp_round (fcopysign (fpext a), b)) is equivalent to (modulo the sign bit): (fp_round (fpext a)) which is a no-op. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@244862 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 9 ++++++++- test/CodeGen/AArch64/f16-instructions.ll | 15 +++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 3b15752e2e4..552f92bbd46 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4301,8 +4301,15 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = Node->getOperand(1); Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + + // fcopysign doesn't change anything but the sign bit, so + // (fp_round (fcopysign (fpext a), b)) + // is as precise as + // (fp_round (fpext a)) + // which is a no-op. Mark it as a TRUNCating FP_ROUND. + const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, - Tmp3, DAG.getIntPtrConstant(0, dl))); + Tmp3, DAG.getIntPtrConstant(isTrunc, dl))); break; } case ISD::FFLOOR: diff --git a/test/CodeGen/AArch64/f16-instructions.ll b/test/CodeGen/AArch64/f16-instructions.ll index 99dc8e33014..3dd54e64623 100644 --- a/test/CodeGen/AArch64/f16-instructions.ll +++ b/test/CodeGen/AArch64/f16-instructions.ll @@ -702,6 +702,21 @@ define half @test_copysign_f64(half %a, double %b) #0 { ret half %r } +; Check that the FP promotion will use a truncating FP_ROUND, so we can fold +; away the (fpext (fp_round )) here. + +; CHECK-LABEL: test_copysign_extended: +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: movi.4s v2, #0x80, lsl #24 +; CHECK-NEXT: bit.16b v0, v1, v2 +; CHECK-NEXT: ret +define float @test_copysign_extended(half %a, half %b) #0 { + %r = call half @llvm.copysign.f16(half %a, half %b) + %xr = fpext half %r to float + ret float %xr +} + ; CHECK-LABEL: test_floor: ; CHECK-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 ; CHECK-NEXT: frintm [[INT32:s[0-9]+]], [[FLOAT32]] -- 2.34.1