From 9f5baa3c7e01e632722a0d7c37b3aa82591bc8a8 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Tue, 11 Feb 2014 23:49:31 +0000 Subject: [PATCH] =?utf8?q?Tweak=20ARM=20fastcc=20by=20adopting=20these=20t?= =?utf8?q?wo=20AAPCS=20rules:=20*=20CPRCs=20may=20be=20allocated=20to=20co?= =?utf8?q?-processor=20registers=20or=20the=20stack=20=E2=80=93=20they=20m?= =?utf8?q?ay=20never=20be=20allocated=20to=20core=20registers=20*=20When?= =?utf8?q?=20a=20CPRC=20is=20allocated=20to=20the=20stack,=20all=20other?= =?utf8?q?=20VFP=20registers=20should=20be=20marked=20as=20unavailable?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The difference is only noticeable in rare cases where there are a large number of floating point arguments (e.g. 7 doubles + additional float, double arguments). Although it's probably still better to avoid vmov as it can cause stalls in some older ARM cores. The other, more subtle benefit, is to minimize difference between the various calling conventions. rdar://16039676 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@201193 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMCallingConv.td | 7 ++++++ test/CodeGen/ARM/fastcc-vfp.ll | 40 ++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 test/CodeGen/ARM/fastcc-vfp.ll diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 9ca2e465d85..b1c1f3eab19 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -64,6 +64,13 @@ def FastCC_ARM_APCS : CallingConv<[ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>>, + + // CPRCs may be allocated to co-processor registers or the stack – they + // may never be allocated to core registers. + CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>, + CCIfType<[f64], CCAssignToStackWithShadow<8, 4, [Q0, Q1, Q2, Q3]>>, + CCIfType<[v2f64], CCAssignToStackWithShadow<16, 4, [Q0, Q1, Q2, Q3]>>, + CCDelegateTo ]>; diff --git a/test/CodeGen/ARM/fastcc-vfp.ll b/test/CodeGen/ARM/fastcc-vfp.ll new file mode 100644 index 00000000000..4c98150c708 --- /dev/null +++ b/test/CodeGen/ARM/fastcc-vfp.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s -mtriple=armv7-apple-ios -mattr=+vfp2 | FileCheck %s + +define fastcc double @t1(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, float %a, float %b) { +entry: +; CHECK-LABEL: t1: +; CHECK-NOT: vmov +; CHECK: vldr + %add = fadd float %a, %b + %conv = fpext float %add to double + ret double %conv +} + +define fastcc double @t2(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %a, float %b, double %c) { +entry: +; CHECK-LABEL: t2: +; CHECK-NOT: vmov +; CHECK: vldr + %add = fadd double %a, %c + ret double %add +} + +define fastcc float @t3(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, float %a, double %b, float %c) { +entry: +; CHECK-LABEL: t3: +; CHECK: vldr + %add = fadd float %a, %c + ret float %add +} + +define fastcc double @t4(double %a, double %b) #0 { +entry: +; CHECK-LABEL: t4: +; CHECK: vstr + %add = fadd double %a, %b + %sub = fsub double %a, %b + %call = tail call fastcc double @x(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double %add, float 0.000000e+00, double %sub) #2 + ret double %call +} + +declare fastcc double @x(double, double, double, double, double, double, double, float, double) -- 2.34.1