From: Tim Northover Date: Wed, 28 Oct 2015 22:46:43 +0000 (+0000) Subject: ARM: add backend support for the ABI used in WatchOS X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=26541ec6e9935ae6cef75a5632c8513aa220af06;p=oota-llvm.git ARM: add backend support for the ABI used in WatchOS At the LLVM level this ABI is essentially a minimal modification of AAPCS to support 16-byte alignment for vector types and the stack. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@251570 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index d687568d7eb..3d216c0ed04 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -199,7 +199,9 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT, // Try to allocate a contiguous block of registers, each of the correct // size to hold one member. - unsigned Align = std::min(PendingMembers[0].getExtraInfo(), 8U); + auto &DL = State.getMachineFunction().getDataLayout(); + unsigned StackAlign = DL.getStackAlignment(); + unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign); ArrayRef RegList; switch (LocVT.SimpleTy) { diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 27cf06b995a..23351641514 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -125,6 +125,8 @@ def CC_ARM_AAPCS_Common : CallingConv<[ CCIfType<[i32], CCAssignToStackWithShadow<4, 4, [R0, R1, R2, R3]>>, CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToStackWithShadow<8, 8, [Q0, Q1, Q2, Q3]>>, + CCIfType<[v2f64], CCIfAlign<"16", + CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>, CCIfType<[v2f64], CCAssignToStackWithShadow<16, 8, [Q0, Q1, Q2, Q3]>> ]>; diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 28e866e8cb3..966f0310f0b 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCContext.h" @@ -58,7 +59,7 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); // iOS requires FP not to be clobbered for backtracing purpose. - if (STI.isTargetIOS()) + if (STI.isTargetIOS() || STI.isTargetWatchOS()) return true; const MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -1073,7 +1074,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, // slot offsets can be wrong. The offset for d8 will always be correct. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned DNum = CSI[i].getReg() - ARM::D8; - if (DNum >= 8) + if (DNum > NumAlignedDPRCS2Regs - 1) continue; int FI = CSI[i].getFrameIdx(); // The even-numbered registers will be 16-byte aligned, the odd-numbered diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index e0d9939d230..aea452cdbc5 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -155,11 +155,18 @@ void ARMSubtarget::initializeEnvironment() { void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (CPUString.empty()) { - if (isTargetDarwin() && TargetTriple.getArchName().endswith("v7s")) - // Default to the Swift CPU when targeting armv7s/thumbv7s. - CPUString = "swift"; - else - CPUString = "generic"; + CPUString = "generic"; + + if (isTargetDarwin()) { + StringRef ArchName = TargetTriple.getArchName(); + if (ArchName.endswith("v7s")) + // Default to the Swift CPU when targeting armv7s/thumbv7s. + CPUString = "swift"; + else if (ArchName.endswith("v7k")) + // Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k. + // ARMv7k does not use SjLj exception handling. + CPUString = "cortex-a7"; + } } // Insert the architecture feature derived from the target triple into the @@ -190,7 +197,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (isAAPCS_ABI()) stackAlignment = 8; - if (isTargetNaCl()) + if (isTargetNaCl() || isAAPCS16_ABI()) stackAlignment = 16; // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo:: @@ -241,8 +248,14 @@ bool ARMSubtarget::isAPCS_ABI() const { } bool ARMSubtarget::isAAPCS_ABI() const { assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); - return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS; + return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS || + TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16; } +bool ARMSubtarget::isAAPCS16_ABI() const { + assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); + return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16; +} + /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. bool diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index ad546c1b14c..f95b682ba7c 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -354,6 +354,7 @@ public: bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } bool isTargetIOS() const { return TargetTriple.isiOS(); } + bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); } @@ -391,12 +392,13 @@ public: // FIXME: this is invalid for WindowsCE return TargetTriple.getEnvironment() == Triple::GNUEABIHF || TargetTriple.getEnvironment() == Triple::EABIHF || - isTargetWindows(); + isTargetWindows() || isAAPCS16_ABI(); } bool isTargetAndroid() const { return TargetTriple.isAndroid(); } bool isAPCS_ABI() const; bool isAAPCS_ABI() const; + bool isAAPCS16_ABI() const; bool useSoftFloat() const { return UseSoftFloat; } bool isThumb() const { return InThumbMode; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 3e3c8646a81..9a5efc8d0b9 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -66,7 +66,9 @@ static std::unique_ptr createTLOF(const Triple &TT) { static ARMBaseTargetMachine::ARMABI computeTargetABI(const Triple &TT, StringRef CPU, const TargetOptions &Options) { - if (Options.MCOptions.getABIName().startswith("aapcs")) + if (Options.MCOptions.getABIName() == "aapcs16") + return ARMBaseTargetMachine::ARM_ABI_AAPCS16; + else if (Options.MCOptions.getABIName().startswith("aapcs")) return ARMBaseTargetMachine::ARM_ABI_AAPCS; else if (Options.MCOptions.getABIName().startswith("apcs")) return ARMBaseTargetMachine::ARM_ABI_APCS; @@ -83,6 +85,8 @@ computeTargetABI(const Triple &TT, StringRef CPU, (TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) || CPU.startswith("cortex-m")) { TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; + } else if (TT.isWatchOS()) { + TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16; } else { TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; } @@ -145,7 +149,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU, // to 64. We always ty to give them natural alignment. if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS) Ret += "-v64:32:64-v128:32:128"; - else + else if (ABI != ARMBaseTargetMachine::ARM_ABI_AAPCS16) Ret += "-v128:64:128"; // Try to align aggregates to 32 bits (the default is 64 bits, which has no @@ -157,7 +161,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU, // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit // aligned everywhere else. - if (TT.isOSNaCl()) + if (TT.isOSNaCl() || ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16) Ret += "-S128"; else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS) Ret += "-S64"; diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 8c98e082ce9..8ad1f3dc2c3 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -26,7 +26,8 @@ public: enum ARMABI { ARM_ABI_UNKNOWN, ARM_ABI_APCS, - ARM_ABI_AAPCS // ARM EABI + ARM_ABI_AAPCS, // ARM EABI + ARM_ABI_AAPCS16 } TargetABI; protected: diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 097b683493c..d016ad8d079 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -196,7 +196,8 @@ std::string ARM_MC::ParseARMTriple(const Triple &TT, StringRef CPU) { else // Use CPU to figure out the exact features. ARMArchFeature = "+v7"; - break; case Triple::ARMSubArch_v7: + break; + case Triple::ARMSubArch_v7: // v7 CPUs have lots of different feature sets. If no CPU is specified, // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return // the "minimum" feature set and use CPU string to figure out the exact diff --git a/test/CodeGen/ARM/v7k-abi-align.ll b/test/CodeGen/ARM/v7k-abi-align.ll new file mode 100644 index 00000000000..f666efc2db9 --- /dev/null +++ b/test/CodeGen/ARM/v7k-abi-align.ll @@ -0,0 +1,146 @@ +; RUN: llc -mtriple=thumbv7k-apple-watchos2.0 -o - %s | FileCheck %s + +%struct = type { i8, i64, i8, double, i8, <2 x float>, i8, <4 x float> } + +define i32 @test_i64_align() { +; CHECK-LABEL: test_i64_align: +; CHECL: movs r0, #8 + ret i32 ptrtoint(i64* getelementptr(%struct, %struct* null, i32 0, i32 1) to i32) +} + +define i32 @test_f64_align() { +; CHECK-LABEL: test_f64_align: +; CHECL: movs r0, #24 + ret i32 ptrtoint(double* getelementptr(%struct, %struct* null, i32 0, i32 3) to i32) +} + +define i32 @test_v2f32_align() { +; CHECK-LABEL: test_v2f32_align: +; CHECL: movs r0, #40 + ret i32 ptrtoint(<2 x float>* getelementptr(%struct, %struct* null, i32 0, i32 5) to i32) +} + +define i32 @test_v4f32_align() { +; CHECK-LABEL: test_v4f32_align: +; CHECL: movs r0, #64 + ret i32 ptrtoint(<4 x float>* getelementptr(%struct, %struct* null, i32 0, i32 7) to i32) +} + +; Key point here is than an extra register has to be saved so that the DPRs end +; up in an aligned location (as prologue/epilogue inserter had calculated). +define void @test_dpr_unwind_align() { +; CHECK-LABEL: test_dpr_unwind_align: +; CHECK: push {r5, r6, r7, lr} +; CHECK-NOT: sub sp +; CHECK: vpush {d8, d9} +; [...] +; CHECK: bl _test_i64_align +; CHECK-NOT: add sp, +; CHECK: vpop {d8, d9} +; CHECK-NOT: add sp, +; CHECK: pop {r5, r6, r7, pc} + + call void asm sideeffect "", "~{r6},~{d8},~{d9}"() + + ; Whatever + call i32 @test_i64_align() + ret void +} + +; This time, there's no viable way to tack CS-registers onto the list: a real SP +; adjustment needs to be performed to put d8 and d9 where they should be. +define void @test_dpr_unwind_align_manually() { +; CHECK-LABEL: test_dpr_unwind_align_manually: +; CHECK: push {r4, r5, r6, r7, lr} +; CHECK-NOT: sub sp +; CHECK: push.w {r8, r11} +; CHECK: sub sp, #4 +; CHECK: vpush {d8, d9} +; [...] +; CHECK: bl _test_i64_align +; CHECK-NOT: add sp, +; CHECK: vpop {d8, d9} +; CHECK: add sp, #4 +; CHECK: pop.w {r8, r11} +; CHECK: pop {r4, r5, r6, r7, pc} + + call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{d8},~{d9}"() + + ; Whatever + call i32 @test_i64_align() + ret void +} + +; If there's only a CS1 area, the sub should be in the right place: +define void @test_dpr_unwind_align_just_cs1() { +; CHECK-LABEL: test_dpr_unwind_align_just_cs1: +; CHECK: push {r4, r5, r6, r7, lr} +; CHECK: sub sp, #4 +; CHECK: vpush {d8, d9} +; CHECK: sub sp, #8 +; [...] +; CHECK: bl _test_i64_align +; CHECK: add sp, #8 +; CHECK: vpop {d8, d9} +; CHECK: add sp, #4 +; CHECK: pop {r4, r5, r6, r7, pc} + + call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{d8},~{d9}"() + + ; Whatever + call i32 @test_i64_align() + ret void +} + +; If there are no DPRs, we shouldn't try to align the stack in stages anyway +define void @test_dpr_unwind_align_no_dprs() { +; CHECK-LABEL: test_dpr_unwind_align_no_dprs: +; CHECK: push {r4, r5, r6, r7, lr} +; CHECK: sub sp, #12 +; [...] +; CHECK: bl _test_i64_align +; CHECK: add sp, #12 +; CHECK: pop {r4, r5, r6, r7, pc} + + call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7}"() + + ; Whatever + call i32 @test_i64_align() + ret void +} + +; 128-bit vectors should use 128-bit (i.e. correctly aligned) slots on +; the stack. +define <4 x float> @test_v128_stack_pass([8 x double], float, <4 x float> %in) { +; CHECK-LABEL: test_v128_stack_pass: +; CHECK: add r[[ADDR:[0-9]+]], sp, #16 +; CHECK: vld1.64 {d0, d1}, [r[[ADDR]]:128] + + ret <4 x float> %in +} + +declare void @varargs(i32, ...) + +; When varargs are enabled, we go down a different route. Still want 128-bit +; alignment though. +define void @test_v128_stack_pass_varargs(<4 x float> %in) { +; CHECK-LABEL: test_v128_stack_pass_varargs: +; CHECK: add r[[ADDR:[0-9]+]], sp, #16 +; CHECK: vst1.64 {d0, d1}, [r[[ADDR]]:128] + + call void(i32, ...) @varargs(i32 undef, [3 x i32] undef, float undef, <4 x float> %in) + ret void +} + +; To be compatible with AAPCS's va_start model (store r0-r3 at incoming SP, give +; a single pointer), 64-bit quantities must be pass +define i64 @test_64bit_gpr_align(i32, i64 %r2_r3, i32 %sp) { +; CHECK-LABEL: test_64bit_gpr_align: +; CHECK: ldr [[RHS:r[0-9]+]], [sp] +; CHECK: adds r0, [[RHS]], r2 +; CHECK: adc r1, r3, #0 + + %ext = zext i32 %sp to i64 + %sum = add i64 %ext, %r2_r3 + ret i64 %sum +}