From 3583d2301879d50e3ed018f4dc48a03b7bbbb205 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Fri, 20 Feb 2015 19:37:14 +0000 Subject: [PATCH] [X86][FastIsel] Teach how to select float-half conversion intrinsics. This patch teaches X86FastISel how to select intrinsic 'convert_from_fp16' and intrinsic 'convert_to_fp16'. If the target has F16C, we can select VCVTPS2PHrr for a float-half conversion, and VCVTPH2PSrr for a half-float conversion. Differential Revision: http://reviews.llvm.org/D7673 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230043 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 62 +++++++++++++++++++ .../X86/fast-isel-double-half-convertion.ll | 23 +++++++ .../X86/fast-isel-float-half-convertion.ll | 28 +++++++++ 3 files changed, 113 insertions(+) create mode 100644 test/CodeGen/X86/fast-isel-double-half-convertion.ll create mode 100644 test/CodeGen/X86/fast-isel-float-half-convertion.ll diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index f63c395fae9..da53a795725 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -2182,6 +2182,68 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { // FIXME: Handle more intrinsics. switch (II->getIntrinsicID()) { default: return false; + case Intrinsic::convert_from_fp16: + case Intrinsic::convert_to_fp16: { + if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) + return false; + + const Value *Op = II->getArgOperand(0); + unsigned InputReg = getRegForValue(Op); + if (InputReg == 0) + return false; + + // F16C only allows converting from float to half and from half to float. + bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16; + if (IsFloatToHalf) { + if (!Op->getType()->isFloatTy()) + return false; + } else { + if (!II->getType()->isFloatTy()) + return false; + } + + unsigned ResultReg = 0; + const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16); + if (IsFloatToHalf) { + // 'InputReg' is implicitly promoted from register class FR32 to + // register class VR128 by method 'constrainOperandRegClass' which is + // directly called by 'fastEmitInst_ri'. + // Instruction VCVTPS2PHrr takes an extra immediate operand which is + // used to provide rounding control. + InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 0); + + // Move the lower 32-bits of ResultReg to another register of class GR32. + ResultReg = createResultReg(&X86::GR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(X86::VMOVPDI2DIrr), ResultReg) + .addReg(InputReg, RegState::Kill); + + // The result value is in the lower 16-bits of ResultReg. + unsigned RegIdx = X86::sub_16bit; + ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx); + } else { + assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!"); + // Explicitly sign-extend the input to 32-bit. + InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg, + /*Kill=*/false); + + // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr. + InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR, + InputReg, /*Kill=*/true); + + InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true); + + // The result value is in the lower 32-bits of ResultReg. + // Emit an explicit copy from register class VR128 to register class FR32. + ResultReg = createResultReg(&X86::FR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(InputReg, RegState::Kill); + } + + updateValueMap(II, ResultReg); + return true; + } case Intrinsic::frameaddress: { MachineFunction *MF = FuncInfo.MF; if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI()) diff --git a/test/CodeGen/X86/fast-isel-double-half-convertion.ll b/test/CodeGen/X86/fast-isel-double-half-convertion.ll new file mode 100644 index 00000000000..ade867b7d70 --- /dev/null +++ b/test/CodeGen/X86/fast-isel-double-half-convertion.ll @@ -0,0 +1,23 @@ +; RUN: llc -fast-isel -fast-isel-abort -mtriple=x86_64-unknown-unknown -mattr=+f16c < %s + +; XFAIL: * + +; In the future, we might want to teach fast-isel how to expand a double-to-half +; conversion into a double-to-float conversion immediately followed by a +; float-to-half conversion. For now, fast-isel is expected to fail. + +define double @test_fp16_to_fp64(i32 %a) { +entry: + %0 = trunc i32 %a to i16 + %1 = call double @llvm.convert.from.fp16.f64(i16 %0) + ret float %0 +} + +define i16 @test_fp64_to_fp16(double %a) { +entry: + %0 = call i16 @llvm.convert.to.fp16.f64(double %a) + ret i16 %0 +} + +declare i16 @llvm.convert.to.fp16.f64(double) +declare double @llvm.convert.from.fp16.f64(i16) diff --git a/test/CodeGen/X86/fast-isel-float-half-convertion.ll b/test/CodeGen/X86/fast-isel-float-half-convertion.ll new file mode 100644 index 00000000000..ee89bcd2a8b --- /dev/null +++ b/test/CodeGen/X86/fast-isel-float-half-convertion.ll @@ -0,0 +1,28 @@ +; RUN: llc -fast-isel -fast-isel-abort -asm-verbose=false -mtriple=x86_64-unknown-unknown -mattr=+f16c < %s | FileCheck %s + +; Verify that fast-isel correctly expands float-half conversions. + +define i16 @test_fp32_to_fp16(float %a) { +; CHECK-LABEL: test_fp32_to_fp16: +; CHECK: vcvtps2ph $0, %xmm0, %xmm0 +; CHECK-NEXT: vmovd %xmm0, %eax +; CHECK-NEXT: retq +entry: + %0 = call i16 @llvm.convert.to.fp16.f32(float %a) + ret i16 %0 +} + +define float @test_fp16_to_fp32(i32 %a) { +; CHECK-LABEL: test_fp16_to_fp32: +; CHECK: movswl %di, %eax +; CHECK-NEXT: vmovd %eax, %xmm0 +; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-NEXT: retq +entry: + %0 = trunc i32 %a to i16 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + ret float %1 +} + +declare i16 @llvm.convert.to.fp16.f32(float) +declare float @llvm.convert.from.fp16.f32(i16) -- 2.34.1