From: Tim Northover Date: Sun, 15 Jun 2014 09:27:15 +0000 (+0000) Subject: AArch64: improve handling & modelling of FP_TO_XINT nodes. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=8bfc50e4a976476425e8f2ba7e71d7d14b48b474;p=oota-llvm.git AArch64: improve handling & modelling of FP_TO_XINT nodes. There's probably no acatual change in behaviour here, just updating the LowerFP_TO_INT function to be more similar to the reverse implementation and updating costs to current CodeGen. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210985 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index fd9e69d3231..c2cf5b7f17f 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1387,24 +1387,22 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { EVT InVT = Op.getOperand(0).getValueType(); EVT VT = Op.getValueType(); - // FP_TO_XINT conversion from the same type are legal. - if (VT.getSizeInBits() == InVT.getSizeInBits()) - return Op; - - if (InVT == MVT::v2f64 || InVT == MVT::v4f32) { + if (VT.getSizeInBits() < InVT.getSizeInBits()) { SDLoc dl(Op); SDValue Cv = DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(), Op.getOperand(0)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv); - } else if (InVT == MVT::v2f32) { + } + + if (VT.getSizeInBits() > InVT.getSizeInBits()) { SDLoc dl(Op); SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, Op.getOperand(0)); return DAG.getNode(Op.getOpcode(), dl, VT, Ext); } // Type changing conversions are illegal. - return SDValue(); + return Op; } SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 4fae0a53b32..1dac14b96af 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -315,8 +315,10 @@ unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, // Complex: to v2f32 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 }, + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 }, { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 }, // Complex: to v4f32 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 }, @@ -341,12 +343,27 @@ unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 }, - { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, + // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext). { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 }, - { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, - { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, + { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 }, + { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 }, { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 }, + { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 }, + { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 }, + + // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2 + { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 }, { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, + { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 }, + + // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2. + { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, + { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 }, + { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 }, + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, + { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 }, + { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 }, }; int Idx = ConvertCostTableLookup( diff --git a/test/CodeGen/AArch64/complex-fp-to-int.ll b/test/CodeGen/AArch64/complex-fp-to-int.ll index 1ea47ade81e..13cf762c3d2 100644 --- a/test/CodeGen/AArch64/complex-fp-to-int.ll +++ b/test/CodeGen/AArch64/complex-fp-to-int.ll @@ -1,24 +1,141 @@ ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s -; CHECK: fptosi_1 -; CHECK: fcvtzs.2d -; CHECK: xtn.2s -; CHECK: ret -define void @fptosi_1(<2 x double> %in, <2 x i32>* %addr) nounwind noinline ssp { -entry: - %0 = fptosi <2 x double> %in to <2 x i32> - store <2 x i32> %0, <2 x i32>* %addr, align 8 - ret void -} - -; CHECK: fptoui_1 -; CHECK: fcvtzu.2d -; CHECK: xtn.2s -; CHECK: ret -define void @fptoui_1(<2 x double> %in, <2 x i32>* %addr) nounwind noinline ssp { -entry: - %0 = fptoui <2 x double> %in to <2 x i32> - store <2 x i32> %0, <2 x i32>* %addr, align 8 - ret void +define <2 x i64> @test_v2f32_to_signed_v2i64(<2 x float> %in) { +; CHECK-LABEL: test_v2f32_to_signed_v2i64: +; CHECK: fcvtl [[VAL64:v[0-9]+]].2d, v0.2s +; CHECK: fcvtzs.2d v0, [[VAL64]] + + %val = fptosi <2 x float> %in to <2 x i64> + ret <2 x i64> %val +} + +define <2 x i64> @test_v2f32_to_unsigned_v2i64(<2 x float> %in) { +; CHECK-LABEL: test_v2f32_to_unsigned_v2i64: +; CHECK: fcvtl [[VAL64:v[0-9]+]].2d, v0.2s +; CHECK: fcvtzu.2d v0, [[VAL64]] + + %val = fptoui <2 x float> %in to <2 x i64> + ret <2 x i64> %val +} + +define <2 x i16> @test_v2f32_to_signed_v2i16(<2 x float> %in) { +; CHECK-LABEL: test_v2f32_to_signed_v2i16: +; CHECK: fcvtzs.2s v0, v0 + + %val = fptosi <2 x float> %in to <2 x i16> + ret <2 x i16> %val +} + +define <2 x i16> @test_v2f32_to_unsigned_v2i16(<2 x float> %in) { +; CHECK-LABEL: test_v2f32_to_unsigned_v2i16: +; CHECK: fcvtzs.2s v0, v0 + + %val = fptoui <2 x float> %in to <2 x i16> + ret <2 x i16> %val } +define <2 x i8> @test_v2f32_to_signed_v2i8(<2 x float> %in) { +; CHECK-LABEL: test_v2f32_to_signed_v2i8: +; CHECK: fcvtzs.2s v0, v0 + + %val = fptosi <2 x float> %in to <2 x i8> + ret <2 x i8> %val +} + +define <2 x i8> @test_v2f32_to_unsigned_v2i8(<2 x float> %in) { +; CHECK-LABEL: test_v2f32_to_unsigned_v2i8: +; CHECK: fcvtzs.2s v0, v0 + + %val = fptoui <2 x float> %in to <2 x i8> + ret <2 x i8> %val +} + +define <4 x i16> @test_v4f32_to_signed_v4i16(<4 x float> %in) { +; CHECK-LABEL: test_v4f32_to_signed_v4i16: +; CHECK: fcvtzs.4s [[VAL64:v[0-9]+]], v0 +; CHECK: xtn.4h v0, [[VAL64]] + + %val = fptosi <4 x float> %in to <4 x i16> + ret <4 x i16> %val +} + +define <4 x i16> @test_v4f32_to_unsigned_v4i16(<4 x float> %in) { +; CHECK-LABEL: test_v4f32_to_unsigned_v4i16: +; CHECK: fcvtzu.4s [[VAL64:v[0-9]+]], v0 +; CHECK: xtn.4h v0, [[VAL64]] + + %val = fptoui <4 x float> %in to <4 x i16> + ret <4 x i16> %val +} + +define <4 x i8> @test_v4f32_to_signed_v4i8(<4 x float> %in) { +; CHECK-LABEL: test_v4f32_to_signed_v4i8: +; CHECK: fcvtzs.4s [[VAL64:v[0-9]+]], v0 +; CHECK: xtn.4h v0, [[VAL64]] + + %val = fptosi <4 x float> %in to <4 x i8> + ret <4 x i8> %val +} + +define <4 x i8> @test_v4f32_to_unsigned_v4i8(<4 x float> %in) { +; CHECK-LABEL: test_v4f32_to_unsigned_v4i8: +; CHECK: fcvtzs.4s [[VAL64:v[0-9]+]], v0 +; CHECK: xtn.4h v0, [[VAL64]] + + %val = fptoui <4 x float> %in to <4 x i8> + ret <4 x i8> %val +} + +define <2 x i32> @test_v2f64_to_signed_v2i32(<2 x double> %in) { +; CHECK-LABEL: test_v2f64_to_signed_v2i32: +; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0 +; CHECK: xtn.2s v0, [[VAL64]] + + %val = fptosi <2 x double> %in to <2 x i32> + ret <2 x i32> %val +} + +define <2 x i32> @test_v2f64_to_unsigned_v2i32(<2 x double> %in) { +; CHECK-LABEL: test_v2f64_to_unsigned_v2i32: +; CHECK: fcvtzu.2d [[VAL64:v[0-9]+]], v0 +; CHECK: xtn.2s v0, [[VAL64]] + + %val = fptoui <2 x double> %in to <2 x i32> + ret <2 x i32> %val +} + +define <2 x i16> @test_v2f64_to_signed_v2i16(<2 x double> %in) { +; CHECK-LABEL: test_v2f64_to_signed_v2i16: +; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0 +; CHECK: xtn.2s v0, [[VAL64]] + + %val = fptosi <2 x double> %in to <2 x i16> + ret <2 x i16> %val +} + +define <2 x i16> @test_v2f64_to_unsigned_v2i16(<2 x double> %in) { +; CHECK-LABEL: test_v2f64_to_unsigned_v2i16: +; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0 +; CHECK: xtn.2s v0, [[VAL64]] + + %val = fptoui <2 x double> %in to <2 x i16> + ret <2 x i16> %val +} + +define <2 x i8> @test_v2f64_to_signed_v2i8(<2 x double> %in) { +; CHECK-LABEL: test_v2f64_to_signed_v2i8: +; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0 +; CHECK: xtn.2s v0, [[VAL64]] + + %val = fptosi <2 x double> %in to <2 x i8> + ret <2 x i8> %val +} + +define <2 x i8> @test_v2f64_to_unsigned_v2i8(<2 x double> %in) { +; CHECK-LABEL: test_v2f64_to_unsigned_v2i8: +; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0 +; CHECK: xtn.2s v0, [[VAL64]] + + %val = fptoui <2 x double> %in to <2 x i8> + ret <2 x i8> %val +}