-//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===//
+//===-- ARMSubtarget.cpp - ARM Subtarget Information ----------------------===//
//
// The LLVM Compiler Infrastructure
//
//
//===----------------------------------------------------------------------===//
//
-// This file implements the ARM specific subclass of TargetSubtarget.
+// This file implements the ARM specific subclass of TargetSubtargetInfo.
//
//===----------------------------------------------------------------------===//
#include "ARMSubtarget.h"
-#include "ARMBaseRegisterInfo.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Target/TargetSubtarget.h"
+#include "ARMFrameLowering.h"
+#include "ARMISelLowering.h"
+#include "ARMInstrInfo.h"
+#include "ARMSelectionDAGInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMMachineFunctionInfo.h"
+#include "Thumb1FrameLowering.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/SmallVector.h"
-
-#define GET_SUBTARGETINFO_CTOR
-#define GET_SUBTARGETINFO_MC_DESC
-#define GET_SUBTARGETINFO_TARGET_DESC
-#include "ARMGenSubtarget.inc"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "arm-subtarget"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "ARMGenSubtargetInfo.inc"
+
static cl::opt<bool>
ReserveR9("arm-reserve-r9", cl::Hidden,
cl::desc("Reserve R9, making it unavailable as GPR"));
static cl::opt<bool>
-DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden);
+ArmUseMOVT("arm-use-movt", cl::init(true), cl::Hidden);
static cl::opt<bool>
-StrictAlign("arm-strict-align", cl::Hidden,
- cl::desc("Disallow all unaligned memory accesses"));
+UseFusedMulOps("arm-use-mulops",
+ cl::init(true), cl::Hidden);
+
+enum AlignMode {
+ DefaultAlign,
+ StrictAlign,
+ NoStrictAlign
+};
+
+static cl::opt<AlignMode>
+Align(cl::desc("Load/store alignment support"),
+ cl::Hidden, cl::init(DefaultAlign),
+ cl::values(
+ clEnumValN(DefaultAlign, "arm-default-align",
+ "Generate unaligned accesses only on hardware/OS "
+ "combinations that are known to support them"),
+ clEnumValN(StrictAlign, "arm-strict-align",
+ "Disallow all unaligned memory accesses"),
+ clEnumValN(NoStrictAlign, "arm-no-strict-align",
+ "Allow unaligned memory accesses"),
+ clEnumValEnd));
+
+enum ITMode {
+ DefaultIT,
+ RestrictedIT,
+ NoRestrictedIT
+};
+
+static cl::opt<ITMode>
+IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
+ cl::ZeroOrMore,
+ cl::values(clEnumValN(DefaultIT, "arm-default-it",
+ "Generate IT block based on arch"),
+ clEnumValN(RestrictedIT, "arm-restrict-it",
+ "Disallow deprecated IT based on ARMv8"),
+ clEnumValN(NoRestrictedIT, "arm-no-restrict-it",
+ "Allow IT blocks based on ARMv7"),
+ clEnumValEnd));
+
+static std::string computeDataLayout(ARMSubtarget &ST) {
+ std::string Ret = "";
+
+ if (ST.isLittle())
+ // Little endian.
+ Ret += "e";
+ else
+ // Big endian.
+ Ret += "E";
+
+ Ret += DataLayout::getManglingComponent(ST.getTargetTriple());
+
+ // Pointers are 32 bits and aligned to 32 bits.
+ Ret += "-p:32:32";
+
+ // On thumb, i16,i18 and i1 have natural aligment requirements, but we try to
+ // align to 32.
+ if (ST.isThumb())
+ Ret += "-i1:8:32-i8:8:32-i16:16:32";
+
+ // ABIs other than APCS have 64 bit integers with natural alignment.
+ if (!ST.isAPCS_ABI())
+ Ret += "-i64:64";
+
+ // We have 64 bits floats. The APCS ABI requires them to be aligned to 32
+ // bits, others to 64 bits. We always try to align to 64 bits.
+ if (ST.isAPCS_ABI())
+ Ret += "-f64:32:64";
+
+ // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others
+ // to 64. We always ty to give them natural alignment.
+ if (ST.isAPCS_ABI())
+ Ret += "-v64:32:64-v128:32:128";
+ else
+ Ret += "-v128:64:128";
+
+ // On thumb and APCS, only try to align aggregates to 32 bits (the default is
+ // 64 bits).
+ if (ST.isThumb() || ST.isAPCS_ABI())
+ Ret += "-a:0:32";
+
+ // Integer registers are 32 bits.
+ Ret += "-n32";
+
+ // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
+ // aligned everywhere else.
+ if (ST.isTargetNaCl())
+ Ret += "-S128";
+ else if (ST.isAAPCS_ABI())
+ Ret += "-S64";
+ else
+ Ret += "-S32";
+
+ return Ret;
+}
+
+/// initializeSubtargetDependencies - Initializes using a CPU and feature string
+/// so that we can use initializer lists for subtarget initialization.
+ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS) {
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+ return *this;
+}
ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool isT)
- : ARMGenSubtargetInfo()
- , ARMArchVersion(V4)
- , ARMProcFamily(Others)
- , ARMFPUType(None)
- , UseNEONForSinglePrecisionFP(false)
- , SlowFPVMLx(false)
- , HasVMLxForwarding(false)
- , SlowFPBrcc(false)
- , IsThumb(isT)
- , ThumbMode(Thumb1)
- , NoARM(false)
- , PostRAScheduler(false)
- , IsR9Reserved(ReserveR9)
- , UseMovt(false)
- , HasFP16(false)
- , HasD16(false)
- , HasHardwareDivide(false)
- , HasT2ExtractPack(false)
- , HasDataBarrier(false)
- , Pref32BitThumb(false)
- , AvoidCPSRPartialUpdate(false)
- , HasMPExtension(false)
- , FPOnlySP(false)
- , AllowsUnalignedMem(false)
- , stackAlignment(4)
- , CPUString(CPU)
- , TargetTriple(TT)
- , TargetABI(ARM_ABI_APCS) {
- // Determine default and user specified characteristics
-
- // When no arch is specified either by CPU or by attributes, make the default
- // ARMv4T.
- const char *ARMArchFeature = "";
- if (CPUString.empty())
- CPUString = "generic";
- if (CPUString == "generic" && (FS.empty() || FS == "generic")) {
- ARMArchVersion = V4T;
- ARMArchFeature = "+v4t";
- }
+ const std::string &FS, TargetMachine &TM,
+ bool IsLittle, const TargetOptions &Options)
+ : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
+ ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle),
+ TargetTriple(TT), Options(Options), TargetABI(ARM_ABI_UNKNOWN),
+ DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))),
+ TSInfo(DL),
+ InstrInfo(isThumb1Only()
+ ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this)
+ : !isThumb()
+ ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
+ : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
+ TLInfo(TM),
+ FrameLowering(!isThumb1Only()
+ ? new ARMFrameLowering(*this)
+ : (ARMFrameLowering *)new Thumb1FrameLowering(*this)) {}
- // Set the boolean corresponding to the current target triple, or the default
- // if one cannot be determined, to true.
- unsigned Len = TT.length();
- unsigned Idx = 0;
-
- if (Len >= 5 && TT.substr(0, 4) == "armv")
- Idx = 4;
- else if (Len >= 6 && TT.substr(0, 5) == "thumb") {
- IsThumb = true;
- if (Len >= 7 && TT[5] == 'v')
- Idx = 6;
- }
- if (Idx) {
- unsigned SubVer = TT[Idx];
- if (SubVer >= '7' && SubVer <= '9') {
- ARMArchVersion = V7A;
- ARMArchFeature = "+v7a";
- if (Len >= Idx+2 && TT[Idx+1] == 'm') {
- ARMArchVersion = V7M;
- ARMArchFeature = "+v7m";
- }
- } else if (SubVer == '6') {
- ARMArchVersion = V6;
- ARMArchFeature = "+v6";
- if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') {
- ARMArchVersion = V6T2;
- ARMArchFeature = "+v6t2";
- }
- } else if (SubVer == '5') {
- ARMArchVersion = V5T;
- ARMArchFeature = "+v5t";
- if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') {
- ARMArchVersion = V5TE;
- ARMArchFeature = "+v5te";
- }
- } else if (SubVer == '4') {
- if (Len >= Idx+2 && TT[Idx+1] == 't') {
- ARMArchVersion = V4T;
- ARMArchFeature = "+v4t";
- } else {
- ARMArchVersion = V4;
- ARMArchFeature = "";
- }
- }
+void ARMSubtarget::initializeEnvironment() {
+ HasV4TOps = false;
+ HasV5TOps = false;
+ HasV5TEOps = false;
+ HasV6Ops = false;
+ HasV6MOps = false;
+ HasV6T2Ops = false;
+ HasV7Ops = false;
+ HasV8Ops = false;
+ HasVFPv2 = false;
+ HasVFPv3 = false;
+ HasVFPv4 = false;
+ HasFPARMv8 = false;
+ HasNEON = false;
+ UseNEONForSinglePrecisionFP = false;
+ UseMulOps = UseFusedMulOps;
+ SlowFPVMLx = false;
+ HasVMLxForwarding = false;
+ SlowFPBrcc = false;
+ InThumbMode = false;
+ HasThumb2 = false;
+ NoARM = false;
+ IsR9Reserved = ReserveR9;
+ UseMovt = false;
+ SupportsTailCall = false;
+ HasFP16 = false;
+ HasD16 = false;
+ HasHardwareDivide = false;
+ HasHardwareDivideInARM = false;
+ HasT2ExtractPack = false;
+ HasDataBarrier = false;
+ Pref32BitThumb = false;
+ AvoidCPSRPartialUpdate = false;
+ AvoidMOVsShifterOperand = false;
+ HasRAS = false;
+ HasMPExtension = false;
+ HasVirtualization = false;
+ FPOnlySP = false;
+ HasPerfMon = false;
+ HasTrustZone = false;
+ HasCrypto = false;
+ HasCRC = false;
+ HasZeroCycleZeroing = false;
+ AllowsUnalignedMem = false;
+ Thumb2DSP = false;
+ UseNaClTrap = false;
+ UnsafeFPMath = false;
+}
+
+void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
+ AttributeSet FnAttrs = MF->getFunction()->getAttributes();
+ Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "target-cpu");
+ Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "target-features");
+ std::string CPU =
+ !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : "";
+ std::string FS =
+ !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
+ if (!FS.empty()) {
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
}
+}
- if (TT.find("eabi") != std::string::npos)
- TargetABI = ARM_ABI_AAPCS;
+void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
+ if (CPUString.empty()) {
+ if (isTargetIOS() && TargetTriple.getArchName().endswith("v7s"))
+ // Default to the Swift CPU when targeting armv7s/thumbv7s.
+ CPUString = "swift";
+ else
+ CPUString = "generic";
+ }
// Insert the architecture feature derived from the target triple into the
// feature string. This is important for setting features that are implied
// based on the architecture version.
- std::string FSWithArch = std::string(ARMArchFeature);
- if (FSWithArch.empty())
- FSWithArch = FS;
- else if (!FS.empty())
- FSWithArch = FSWithArch + "," + FS;
- ParseSubtargetFeatures(FSWithArch, CPUString);
+ std::string ArchFS = ARM_MC::ParseARMTriple(TargetTriple.getTriple(),
+ CPUString);
+ if (!FS.empty()) {
+ if (!ArchFS.empty())
+ ArchFS = ArchFS + "," + FS.str();
+ else
+ ArchFS = FS;
+ }
+ ParseSubtargetFeatures(CPUString, ArchFS);
+
+ // FIXME: This used enable V6T2 support implicitly for Thumb2 mode.
+ // Assert this for now to make the change obvious.
+ assert(hasV6T2Ops() || !hasThumb2());
+
+ // Keep a pointer to static instruction cost data for the specified CPU.
+ SchedModel = getSchedModelForCPU(CPUString);
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUString);
- // After parsing Itineraries, set ItinData.IssueWidth.
- computeIssueWidth();
+ if (TargetABI == ARM_ABI_UNKNOWN) {
+ switch (TargetTriple.getEnvironment()) {
+ case Triple::Android:
+ case Triple::EABI:
+ case Triple::EABIHF:
+ case Triple::GNUEABI:
+ case Triple::GNUEABIHF:
+ TargetABI = ARM_ABI_AAPCS;
+ break;
+ default:
+ if ((isTargetIOS() && isMClass()) ||
+ (TargetTriple.isOSBinFormatMachO() &&
+ TargetTriple.getOS() == Triple::UnknownOS))
+ TargetABI = ARM_ABI_AAPCS;
+ else
+ TargetABI = ARM_ABI_APCS;
+ break;
+ }
+ }
- // Thumb2 implies at least V6T2.
- if (ARMArchVersion >= V6T2)
- ThumbMode = Thumb2;
- else if (ThumbMode >= Thumb2)
- ARMArchVersion = V6T2;
+ // FIXME: this is invalid for WindowsCE
+ if (isTargetWindows()) {
+ TargetABI = ARM_ABI_AAPCS;
+ NoARM = true;
+ }
if (isAAPCS_ABI())
stackAlignment = 8;
+ if (isTargetNaCl())
+ stackAlignment = 16;
- if (!isTargetDarwin())
- UseMovt = hasV6T2Ops();
- else {
- IsR9Reserved = ReserveR9 | (ARMArchVersion < V6);
- UseMovt = DarwinUseMOVT && hasV6T2Ops();
+ UseMovt = hasV6T2Ops() && ArmUseMOVT;
+
+ if (isTargetMachO()) {
+ IsR9Reserved = ReserveR9 | !HasV6Ops;
+ SupportsTailCall = !isTargetIOS() || !getTargetTriple().isOSVersionLT(5, 0);
+ } else {
+ IsR9Reserved = ReserveR9;
+ SupportsTailCall = !isThumb1Only();
}
- if (!isThumb() || hasThumb2())
- PostRAScheduler = true;
+ switch (Align) {
+ case DefaultAlign:
+ // Assume pre-ARMv6 doesn't support unaligned accesses.
+ //
+ // ARMv6 may or may not support unaligned accesses depending on the
+ // SCTLR.U bit, which is architecture-specific. We assume ARMv6
+ // Darwin and NetBSD targets support unaligned accesses, and others don't.
+ //
+ // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit
+ // which raises an alignment fault on unaligned accesses. Linux
+ // defaults this bit to 0 and handles it as a system-wide (not
+ // per-process) setting. It is therefore safe to assume that ARMv7+
+ // Linux targets support unaligned accesses. The same goes for NaCl.
+ //
+ // The above behavior is consistent with GCC.
+ AllowsUnalignedMem =
+ (hasV7Ops() && (isTargetLinux() || isTargetNaCl() ||
+ isTargetNetBSD())) ||
+ (hasV6Ops() && (isTargetMachO() || isTargetNetBSD()));
+ // The one exception is cortex-m0, which despite being v6, does not
+ // support unaligned accesses. Rather than make the above boolean
+ // expression even more obtuse, just override the value here.
+ if (isThumb1Only() && isMClass())
+ AllowsUnalignedMem = false;
+ break;
+ case StrictAlign:
+ AllowsUnalignedMem = false;
+ break;
+ case NoStrictAlign:
+ AllowsUnalignedMem = true;
+ break;
+ }
+
+ switch (IT) {
+ case DefaultIT:
+ RestrictIT = hasV8Ops() ? true : false;
+ break;
+ case RestrictedIT:
+ RestrictIT = true;
+ break;
+ case NoRestrictedIT:
+ RestrictIT = false;
+ break;
+ }
- // v6+ may or may not support unaligned mem access depending on the system
- // configuration.
- if (!StrictAlign && hasV6Ops() && isTargetDarwin())
- AllowsUnalignedMem = true;
+ // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
+ uint64_t Bits = getFeatureBits();
+ if ((Bits & ARM::ProcA5 || Bits & ARM::ProcA8) && // Where this matters
+ (Options.UnsafeFPMath || isTargetDarwin()))
+ UseNEONForSinglePrecisionFP = true;
}
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
if (GV->isDeclaration() && !GV->isMaterializable())
isDecl = true;
- if (!isTargetDarwin()) {
+ if (!isTargetMachO()) {
// Extra load is needed for all externally visible.
if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
return false;
}
unsigned ARMSubtarget::getMispredictionPenalty() const {
- // If we have a reasonable estimate of the pipeline depth, then we can
- // estimate the penalty of a misprediction based on that.
- if (isCortexA8())
- return 13;
- else if (isCortexA9())
- return 8;
-
- // Otherwise, just return a sensible default.
- return 10;
+ return SchedModel->MispredictPenalty;
}
-void ARMSubtarget::computeIssueWidth() {
- unsigned allStage1Units = 0;
- for (const InstrItinerary *itin = InstrItins.Itineraries;
- itin->FirstStage != ~0U; ++itin) {
- const InstrStage *IS = InstrItins.Stages + itin->FirstStage;
- allStage1Units |= IS->getUnits();
- }
- InstrItins.IssueWidth = 0;
- while (allStage1Units) {
- ++InstrItins.IssueWidth;
- // clear the lowest bit
- allStage1Units ^= allStage1Units & ~(allStage1Units - 1);
- }
- assert(InstrItins.IssueWidth <= 2 && "itinerary bug, too many stage 1 units");
+bool ARMSubtarget::hasSinCos() const {
+ return getTargetTriple().getOS() == Triple::IOS &&
+ !getTargetTriple().isOSVersionLT(7, 0);
+}
+
+// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
+bool ARMSubtarget::enablePostMachineScheduler() const {
+ return (!isThumb() || hasThumb2());
+}
+
+bool ARMSubtarget::enableAtomicExpandLoadLinked() const {
+ return hasAnyDataBarrier() && !isThumb1Only();
}
-bool ARMSubtarget::enablePostRAScheduler(
- CodeGenOpt::Level OptLevel,
- TargetSubtarget::AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const {
- Mode = TargetSubtarget::ANTIDEP_CRITICAL;
- CriticalPathRCs.clear();
- CriticalPathRCs.push_back(&ARM::GPRRegClass);
- return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
+bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
+ // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
+ // immediates as it is inherently position independent, and may be out of
+ // range otherwise.
+ return UseMovt && (isTargetWindows() ||
+ !MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::MinSize));
}