//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-isel"
#include "ARMISelLowering.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
#include <utility>
using namespace llvm;
+#define DEBUG_TYPE "arm-isel"
+
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
class ARMCCState : public CCState {
public:
ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
- const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs,
- LLVMContext &C, ParmContext PC)
- : CCState(CC, isVarArg, MF, TM, locs, C) {
+ SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
+ ParmContext PC)
+ : CCState(CC, isVarArg, MF, locs, C) {
assert(((PC == Call) || (PC == Prologue)) &&
"ARMCCState users must specify whether their context is call"
"or prologue generation.");
}
// The APCS parameter registers.
-static const uint16_t GPRArgRegs[] = {
+static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
}
-static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
- if (TM.getSubtarget<ARMSubtarget>().isTargetMachO())
+static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
+ if (TT.isOSBinFormatMachO())
return new TargetLoweringObjectFileMachO();
-
+ if (TT.isOSWindows())
+ return new TargetLoweringObjectFileCOFF();
return new ARMElfTargetObjectFile();
}
ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
- : TargetLowering(TM, createTLOF(TM)) {
+ : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
- RegInfo = TM.getRegisterInfo();
- Itins = TM.getInstrItineraryData();
+ RegInfo = TM.getSubtargetImpl()->getRegisterInfo();
+ Itins = TM.getSubtargetImpl()->getInstrItineraryData();
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
if (Subtarget->isTargetMachO()) {
// Uses VFP for Thumb libfuncs if available.
if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
- Subtarget->hasARMOps()) {
+ Subtarget->hasARMOps() && !TM.Options.UseSoftFloat) {
// Single-precision floating-point arithmetic.
setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
}
// These libcalls are not available in 32-bit.
- setLibcallName(RTLIB::SHL_I128, 0);
- setLibcallName(RTLIB::SRL_I128, 0);
- setLibcallName(RTLIB::SRA_I128, 0);
+ setLibcallName(RTLIB::SHL_I128, nullptr);
+ setLibcallName(RTLIB::SRL_I128, nullptr);
+ setLibcallName(RTLIB::SRA_I128, nullptr);
if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() &&
!Subtarget->isTargetWindows()) {
- // Double-precision floating-point arithmetic helper functions
- // RTABI chapter 4.1.2, Table 2
- setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
- setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
- setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
- setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
- setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
-
- // Double-precision floating-point comparison helper functions
- // RTABI chapter 4.1.2, Table 3
- setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
- setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
- setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
- setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
- setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
- setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
- setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
- setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
- setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
- setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
- setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
- setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
- setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun");
- setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
- setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun");
- setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
- setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
-
- // Single-precision floating-point arithmetic helper functions
- // RTABI chapter 4.1.2, Table 4
- setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
- setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
- setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
- setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
- setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
-
- // Single-precision floating-point comparison helper functions
- // RTABI chapter 4.1.2, Table 5
- setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
- setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
- setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
- setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
- setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
- setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
- setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
- setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
- setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
- setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
- setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
- setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
- setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun");
- setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
- setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun");
- setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
- setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
-
- // Floating-point to integer conversions.
- // RTABI chapter 4.1.2, Table 6
- setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
- setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
- setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
- setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
- setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
- setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
- setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
- setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
- setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
-
- // Conversions between floating types.
- // RTABI chapter 4.1.2, Table 7
- setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
- setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d");
- setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
-
- // Integer to floating-point conversions.
- // RTABI chapter 4.1.2, Table 8
- setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
- setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
- setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
- setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
- setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
- setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
- setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
- setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
- setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
-
- // Long long helper functions
- // RTABI chapter 4.2, Table 9
- setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul");
- setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
- setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
- setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
- setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
-
- // Integer division functions
- // RTABI chapter 4.3.1
- setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv");
- setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
- setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
- setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
- setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv");
- setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
- setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
- setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
- setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
-
- // Memory operations
- // RTABI chapter 4.3.4
- setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy");
- setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove");
- setLibcallName(RTLIB::MEMSET, "__aeabi_memset");
- setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS);
+ static const struct {
+ const RTLIB::Libcall Op;
+ const char * const Name;
+ const CallingConv::ID CC;
+ const ISD::CondCode Cond;
+ } LibraryCalls[] = {
+ // Double-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 2
+ { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Double-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 3
+ { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
+ { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
+
+ // Single-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 4
+ { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Single-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 5
+ { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
+ { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
+
+ // Floating-point to integer conversions.
+ // RTABI chapter 4.1.2, Table 6
+ { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Conversions between floating types.
+ // RTABI chapter 4.1.2, Table 7
+ { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Integer to floating-point conversions.
+ // RTABI chapter 4.1.2, Table 8
+ { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Long long helper functions
+ // RTABI chapter 4.2, Table 9
+ { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Integer division functions
+ // RTABI chapter 4.3.1
+ { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Memory operations
+ // RTABI chapter 4.3.4
+ { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ };
+
+ for (const auto &LC : LibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ setLibcallCallingConv(LC.Op, LC.CC);
+ if (LC.Cond != ISD::SETCC_INVALID)
+ setCmpLibcallCC(LC.Op, LC.Cond);
+ }
+ }
+
+ if (Subtarget->isTargetWindows()) {
+ static const struct {
+ const RTLIB::Libcall Op;
+ const char * const Name;
+ const CallingConv::ID CC;
+ } LibraryCalls[] = {
+ { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
+ };
+
+ for (const auto &LC : LibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ setLibcallCallingConv(LC.Op, LC.CC);
+ }
}
// Use divmod compiler-rt calls for iOS 5.0 and later.
addRegisterClass(MVT::f32, &ARM::SPRRegClass);
if (!Subtarget->isFPOnlySP())
addRegisterClass(MVT::f64, &ARM::DPRRegClass);
-
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
}
for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+
+ setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+
+ setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
}
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
computeRegisterProperties();
- // ARM does not have f32 extending load.
+ // ARM does not have floating-point extending loads.
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
+
+ // ... or truncating stores
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
// ARM does not have i1 sign extending load.
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
}
}
+ setOperationAction(ISD::SADDO, MVT::i32, Custom);
+ setOperationAction(ISD::UADDO, MVT::i32, Custom);
+ setOperationAction(ISD::SSUBO, MVT::i32, Custom);
+ setOperationAction(ISD::USUBO, MVT::i32, Custom);
+
// i64 operation support.
setOperationAction(ISD::MUL, MVT::i64, Expand);
setOperationAction(ISD::MULHU, MVT::i32, Expand);
setExceptionSelectorRegister(ARM::R1);
}
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ else
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
}
- // Special handling for half-precision FP.
+
+ // v8 adds f64 <-> f16 conversion. Before that it should be expanded.
+ if (!Subtarget->hasV8Ops()) {
+ setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
+ }
+
+ // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
if (!Subtarget->hasFP16()) {
- setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
- setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
}
}
-
+
// Combine sin / cos into one node or libcall if possible.
if (Subtarget->hasSinCos()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
// and extractions.
std::pair<const TargetRegisterClass*, uint8_t>
ARMTargetLowering::findRepresentativeClass(MVT VT) const{
- const TargetRegisterClass *RRC = 0;
+ const TargetRegisterClass *RRC = nullptr;
uint8_t Cost = 1;
switch (VT.SimpleTy) {
default:
const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
- default: return 0;
+ default: return nullptr;
case ARMISD::Wrapper: return "ARMISD::Wrapper";
case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
+ case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK";
+
case ARMISD::VCEQ: return "ARMISD::VCEQ";
case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
case ARMISD::VCGE: return "ARMISD::VCGE";
// Load are scheduled for latency even if there instruction itinerary
// is not available.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
if (MCID.getNumDefs() == 0)
#include "ARMGenCallingConv.inc"
-/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
-/// given CallingConvention value.
-CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
- bool Return,
- bool isVarArg) const {
+/// getEffectiveCallingConv - Get the effective calling convention, taking into
+/// account presence of floating point hardware and calling convention
+/// limitations, such as support for variadic functions.
+CallingConv::ID
+ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
+ bool isVarArg) const {
switch (CC) {
default:
llvm_unreachable("Unsupported calling convention");
- case CallingConv::Fast:
- if (Subtarget->hasVFP2() && !isVarArg) {
- if (!Subtarget->isAAPCS_ABI())
- return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
- // For AAPCS ABI targets, just use VFP variant of the calling convention.
- return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
- }
- // Fallthrough
- case CallingConv::C: {
- // Use target triple & subtarget features to do actual dispatch.
+ case CallingConv::ARM_AAPCS:
+ case CallingConv::ARM_APCS:
+ case CallingConv::GHC:
+ return CC;
+ case CallingConv::ARM_AAPCS_VFP:
+ return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
+ case CallingConv::C:
if (!Subtarget->isAAPCS_ABI())
- return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
- else if (Subtarget->hasVFP2() &&
+ return CallingConv::ARM_APCS;
+ else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
!isVarArg)
- return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
- return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ return CallingConv::ARM_AAPCS_VFP;
+ else
+ return CallingConv::ARM_AAPCS;
+ case CallingConv::Fast:
+ if (!Subtarget->isAAPCS_ABI()) {
+ if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
+ return CallingConv::Fast;
+ return CallingConv::ARM_APCS;
+ } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
+ return CallingConv::ARM_AAPCS_VFP;
+ else
+ return CallingConv::ARM_AAPCS;
}
- case CallingConv::ARM_AAPCS_VFP:
- if (!isVarArg)
- return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
- // Fallthrough
- case CallingConv::ARM_AAPCS:
- return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+}
+
+/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
+/// CallingConvention.
+CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
+ bool Return,
+ bool isVarArg) const {
+ switch (getEffectiveCallingConv(CC, isVarArg)) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
case CallingConv::ARM_APCS:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ case CallingConv::ARM_AAPCS:
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ case CallingConv::ARM_AAPCS_VFP:
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ case CallingConv::Fast:
+ return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
case CallingConv::GHC:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
}
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext(), Call);
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext(), Call);
CCInfo.AnalyzeCallResult(Ins,
CCAssignFnForNode(CallConv, /* Return*/ true,
isVarArg));
InFlag);
Chain = Hi.getValue(1);
InFlag = Hi.getValue(2);
+ if (!Subtarget->isLittle())
+ std::swap (Lo, Hi);
Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
if (VA.getLocVT() == MVT::v2f64) {
Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
Chain = Hi.getValue(1);
InFlag = Hi.getValue(2);
+ if (!Subtarget->isLittle())
+ std::swap (Lo, Hi);
Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
DAG.getConstant(1, MVT::i32));
SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Arg);
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
+ unsigned id = Subtarget->isLittle() ? 0 : 1;
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
if (NextVA.isRegLoc())
- RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
+ RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
else {
assert(NextVA.isMemLoc());
- if (StackPtr.getNode() == 0)
+ if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
- MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
dl, DAG, NextVA,
Flags));
}
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
Outs, OutVals, Ins, DAG);
+ if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
+ report_fatal_error("failed to perform tail call elimination on a call "
+ "site marked musttail");
// We don't support GuaranteedTailCallOpt for ARM, only automatically
// detected sibcalls.
if (isTailCall) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext(), Call);
CCInfo.AnalyzeCallOperands(Outs,
CCAssignFnForNode(CallConv, /* Return*/ false,
isVarArg));
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
- Ops, array_lengthof(Ops)));
+ Ops));
}
} else if (!isSibCall) {
assert(VA.isMemLoc());
}
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
if (EnableARMLongCalls) {
- assert (getTargetMachine().getRelocationModel() == Reloc::Static
- && "long-calls with non-static relocation model!");
+ assert((Subtarget->isTargetWindows() ||
+ getTargetMachine().getRelocationModel() == Reloc::Static) &&
+ "long-calls with non-static relocation model!");
// Handle a global address or an external symbol. If it's not one of
// those, the target's already in a register, so we don't need to do
// anything extra.
bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
bool isStub = (isExt && Subtarget->isTargetMachO()) &&
getTargetMachine().getRelocationModel() != Reloc::Static;
- isARMFunc = !Subtarget->isThumb() || isStub;
+ isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
// ARM call to a local ARM function is predicable.
isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
// tBX takes a register source operand.
if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),
- DAG.getTargetGlobalAddress(GV, dl, getPointerTy()));
+ DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
+ 0, ARMII::MO_NONLAZY));
+ Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
+ MachinePointerInfo::getGOT(), false, false, true, 0);
+ } else if (Subtarget->isTargetCOFF()) {
+ assert(Subtarget->isTargetWindows() &&
+ "Windows is the only supported COFF target");
+ unsigned TargetFlags = GV->hasDLLImportStorageClass()
+ ? ARMII::MO_DLLIMPORT
+ : ARMII::MO_NO_FLAG;
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0,
+ TargetFlags);
+ if (GV->hasDLLImportStorageClass())
+ Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(),
+ Callee), MachinePointerInfo::getGOT(),
+ false, false, false, 0);
} else {
// On ELF targets for PIC code, direct calls should go through the PLT
unsigned OpFlags = 0;
isDirect = true;
bool isStub = Subtarget->isTargetMachO() &&
getTargetMachine().getRelocationModel() != Reloc::Static;
- isARMFunc = !Subtarget->isThumb() || isStub;
+ isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
// tBX takes a register source operand.
const char *Sym = S->getSymbol();
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
// FIXME: handle tail calls differently.
unsigned CallOpc;
- bool HasMinSizeAttr = Subtarget->isMinSize();
+ bool HasMinSizeAttr = MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::MinSize);
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
// Add a register mask operand representing the call-preserved registers.
if (!isTailCall) {
const uint32_t *Mask;
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
if (isThisReturn) {
// For 'this' returns, use the R0-preserving mask if applicable
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
if (isTailCall)
- return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+ return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
// Returns a chain and a flag for retval copy to use.
- Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
// results are returned in the same way as what the caller expects.
if (!CCMatch) {
SmallVector<CCValAssign, 16> RVLocs1;
- ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
+ ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
+ *DAG.getContext(), Call);
CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
SmallVector<CCValAssign, 16> RVLocs2;
- ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
+ ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
+ *DAG.getContext(), Call);
CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
if (RVLocs1.size() != RVLocs2.size())
// Check if stack adjustment is needed. For now, do not do this if any
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
- ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
+ ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext(), Call);
CCInfo.AnalyzeCallOperands(Outs,
CCAssignFnForNode(CalleeCC, false, isVarArg));
if (CCInfo.getNextStackOffset()) {
// the caller's fixed stack objects.
MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
i != e;
++i, ++realArgIdx) {
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
isVarArg));
}
RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false));
- return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other,
- RetOps.data(), RetOps.size());
+ return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
}
SDValue
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slots.
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext(), Call);
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext(), Call);
// Analyze outgoing return values.
CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
SDValue Flag;
SmallVector<SDValue, 4> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+ bool isLittleEndian = Subtarget->isLittle();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ AFI->setReturnRegsCount(RVLocs.size());
// Copy the result values into the output registers.
for (unsigned i = 0, realRVLocIdx = 0;
SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Half);
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ HalfGPRs.getValue(isLittleEndian ? 0 : 1),
+ Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- HalfGPRs.getValue(1), Flag);
+ HalfGPRs.getValue(isLittleEndian ? 1 : 0),
+ Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
// Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
// available.
SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
- DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
+ DAG.getVTList(MVT::i32, MVT::i32), Arg);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ fmrrd.getValue(isLittleEndian ? 0 : 1),
+ Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ fmrrd.getValue(isLittleEndian ? 1 : 0),
Flag);
} else
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
return LowerInterruptReturn(RetOps, dl, DAG);
}
- return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other,
- RetOps.data(), RetOps.size());
+ return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
}
bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
Entry.Node = Argument;
Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
Args.push_back(Entry);
+
// FIXME: is there useful debug info available here?
- TargetLowering::CallLoweringInfo CLI(Chain,
- (Type *) Type::getInt32Ty(*DAG.getContext()),
- false, false, false, false,
- 0, CallingConv::C, /*isTailCall=*/false,
- /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
- DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain)
+ .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args),
+ 0);
+
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
}
// If we have T2 ops, we can materialize the address directly via movt/movw
// pair. This is always cheaper.
- if (Subtarget->useMovt()) {
+ if (Subtarget->useMovt(DAG.getMachineFunction())) {
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes.
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
- if (Subtarget->useMovt())
+ if (Subtarget->useMovt(DAG.getMachineFunction()))
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
return Result;
}
+SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
+ assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
+ "Windows on ARM expects to use movw/movt");
+
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ const ARMII::TOF TargetFlags =
+ (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
+ EVT PtrVT = getPointerTy();
+ SDValue Result;
+ SDLoc DL(Op);
+
+ ++NumMovwMovt;
+
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into two nodes.
+ Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
+ TargetFlags));
+ if (GV->hasDLLImportStorageClass())
+ Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(), false, false, false, 0);
+ return Result;
+}
+
SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetELF() &&
SDLoc dl(Op);
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::arm_rbit: {
+ assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+ "RBIT intrinsic must have i32 type!");
+ return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(0));
+ }
case Intrinsic::arm_thread_pointer: {
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
}
-
+ if (!Subtarget->isLittle())
+ std::swap (ArgValue, ArgValue2);
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
}
NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
}
- unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned Align = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
ArgRegsSize = NumGPRs * 4;
// If parameter is split between stack and GPRs...
AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOps[0], MemOps.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
return FrameIndex;
} else {
if (ArgSize == 0) {
// If there is no regs to be stored, just point address after last
// argument passed via stack.
int FrameIndex =
- StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(),
- 0, ArgOffset, 0, ForceMutable, 0, TotalArgRegsSaveSize);
+ StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
+ CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable,
+ 0, TotalArgRegsSaveSize);
AFI->setVarArgsFrameIndex(FrameIndex);
}
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue);
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext(), Prologue);
CCInfo.AnalyzeFormalArguments(Ins,
CCAssignFnForNode(CallConv, /* Return*/ false,
isVarArg));
return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
}
+std::pair<SDValue, SDValue>
+ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
+ SDValue &ARMcc) const {
+ assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
+
+ SDValue Value, OverflowCmp;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+
+ // FIXME: We are currently always generating CMPs because we don't support
+ // generating CMN through the backend. This is not as good as the natural
+ // CMP case because it causes a register dependency and cannot be folded
+ // later.
+
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown overflow instruction!");
+ case ISD::SADDO:
+ ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32);
+ Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS);
+ break;
+ case ISD::UADDO:
+ ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32);
+ Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS);
+ break;
+ case ISD::SSUBO:
+ ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32);
+ Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS);
+ break;
+ case ISD::USUBO:
+ ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32);
+ Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS);
+ break;
+ } // switch (...)
+
+ return std::make_pair(Value, OverflowCmp);
+}
+
+
+SDValue
+ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
+ // Let legalize expand this if it isn't a legal type yet.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
+ return SDValue();
+
+ SDValue Value, OverflowCmp;
+ SDValue ARMcc;
+ std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ // We use 0 and 1 as false and true values.
+ SDValue TVal = DAG.getConstant(1, MVT::i32);
+ SDValue FVal = DAG.getConstant(0, MVT::i32);
+ EVT VT = Op.getValueType();
+
+ SDValue Overflow = DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, TVal, FVal,
+ ARMcc, CCR, OverflowCmp);
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow);
+}
+
+
SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Cond = Op.getOperand(0);
SDValue SelectTrue = Op.getOperand(1);
SDValue SelectFalse = Op.getOperand(2);
SDLoc dl(Op);
+ unsigned Opc = Cond.getOpcode();
+
+ if (Cond.getResNo() == 1 &&
+ (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
+ Opc == ISD::USUBO)) {
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
+ return SDValue();
+
+ SDValue Value, OverflowCmp;
+ SDValue ARMcc;
+ std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ EVT VT = Op.getValueType();
+
+ return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse,
+ ARMcc, CCR, OverflowCmp);
+
+ }
// Convert:
//
ARMcc = DAG.getConstant(CondCode, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
- return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
+ return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
}
return SDValue();
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
- SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+ SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
if (CondCode2 != ARMCC::AL) {
ARMcc = DAG.getConstant(CondCode2, MVT::i32);
SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
- Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+ Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
}
return Res;
}
// Bitcast operand 1 to i32.
if (SrcVT == MVT::f64)
Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
- &Tmp1, 1).getValue(1);
+ Tmp1).getValue(1);
Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
// Or in the signbit with integer operations.
// f64: Or the high part with signbit and then combine two parts.
Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
- &Tmp0, 1);
+ Tmp0);
SDValue Lo = Tmp0.getValue(0);
SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
}
SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ const ARMBaseRegisterInfo &ARI =
+ *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetMachO())
- ? ARM::R7 : ARM::R11;
+ unsigned FrameReg = ARI.getFrameRegister(MF);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
return FrameAddr;
}
+// FIXME? Maybe this could be a TableGen attribute on some registers and
+// this table could be generated automatically from RegInfo.
+unsigned ARMTargetLowering::getRegisterByName(const char* RegName,
+ EVT VT) const {
+ unsigned Reg = StringSwitch<unsigned>(RegName)
+ .Case("sp", ARM::SP)
+ .Default(0);
+ if (Reg)
+ return Reg;
+ report_fatal_error("Invalid register name global variable");
+}
+
/// ExpandBITCAST - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
// Turn f64->i64 into VMOVRRD.
if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
- SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
- DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
+ SDValue Cvt;
+ if (TLI.isBigEndian() && SrcVT.isVector() &&
+ SrcVT.getVectorNumElements() > 1)
+ Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32),
+ DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
+ else
+ Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Op);
// Merge the pieces into a single i64 value.
return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
}
CCR, Cmp);
SDValue Ops[2] = { Lo, Hi };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
CCR, Cmp);
SDValue Ops[2] = { Lo, Hi };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
// First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
// captures the result into a carry flag.
unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
- Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1);
+ Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
// The low part is an ARMISD::RRX operand, which shifts the carry in.
Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
BitMask <<= 8;
ImmMask <<= 1;
}
+
+ if (DAG.getTargetLoweringInfo().isBigEndian())
+ // swap higher and lower 32 bit word
+ Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
+
// Op=1, Cmode=1110.
OpCmode = 0x1e;
VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
Ops.push_back(N);
Ops.push_back(Op.getOperand(I));
Ops.push_back(DAG.getConstant(I, MVT::i32));
- N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3);
+ N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
}
}
return N;
Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
Op.getOperand(i)));
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
- SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts);
+ SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
Val = LowerBUILD_VECTOR(Val, DAG, ST);
if (Val.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
- SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
if (V2.getNode()->getOpcode() == ISD::UNDEF)
return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
- &VTBLMask[0], 8));
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));
return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
- &VTBLMask[0], 8));
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));
}
static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
DAG.getConstant(ShuffleMask[i] & (NumElts-1),
MVT::i32)));
}
- SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
// operation legalization where we can't create illegal types.
return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
- LD->getMemoryVT(), LD->isVolatile(),
+ LD->getMemoryVT(), LD->isVolatile(), LD->isInvariant(),
LD->isNonTemporal(), LD->getAlignment());
}
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
}
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
- MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
+ MVT::getVectorVT(TruncVT, NumElts), Ops);
}
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
? "__sincos_stret" : "__sincosf_stret";
SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
- TargetLowering::
- CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()),
- false, false, false, false, 0,
- CallingConv::C, /*isTaillCall=*/false,
- /*doesNotRet=*/false, /*isReturnValueUsed*/false,
- Callee, Args, DAG, dl);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee,
+ std::move(Args), 0)
+ .setDiscardResult();
+
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
};
Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
- DAG.getVTList(MVT::i32, MVT::Other), &Ops[0],
- array_lengthof(Ops));
+ DAG.getVTList(MVT::i32, MVT::Other), Ops);
OutChain = Cycles32.getValue(1);
} else {
// Intrinsic is defined to return 0 on unsupported platforms. Technically
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GlobalAddress:
- return Subtarget->isTargetMachO() ? LowerGlobalAddressDarwin(Op, DAG) :
- LowerGlobalAddressELF(Op, DAG);
+ switch (Subtarget->getTargetTriple().getObjectFormat()) {
+ default: llvm_unreachable("unknown object format");
+ case Triple::COFF:
+ return LowerGlobalAddressWindows(Op, DAG);
+ case Triple::ELF:
+ return LowerGlobalAddressELF(Op, DAG);
+ case Triple::MachO:
+ return LowerGlobalAddressDarwin(Op, DAG);
+ }
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::ADDE:
case ISD::SUBC:
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ return LowerXALUO(Op, DAG);
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
case ISD::SDIVREM:
case ISD::UDIVREM: return LowerDivRem(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC:
+ if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
+ return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ llvm_unreachable("Don't know how to custom lower this!");
}
}
void ARMTargetLowering::
SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
MachineBasicBlock *DispatchBB, int FI) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
MachineBasicBlock *ARMTargetLowering::
EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
}
// N.B. the order the invoke BBs are processed in doesn't matter here.
- const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF);
+ const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
SmallVector<MachineBasicBlock*, 64> MBBLPads;
for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) {
// This pseudo instruction has 3 operands: dst, src, size
// We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
// Otherwise, we will generate unrolled scalar copies.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = BB;
++It;
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned UnitSize = 0;
- const TargetRegisterClass *TRC = 0;
- const TargetRegisterClass *VecTRC = 0;
+ const TargetRegisterClass *TRC = nullptr;
+ const TargetRegisterClass *VecTRC = nullptr;
bool IsThumb1 = Subtarget->isThumb1Only();
bool IsThumb2 = Subtarget->isThumb2();
? (const TargetRegisterClass *)&ARM::DPairRegClass
: UnitSize == 8
? (const TargetRegisterClass *)&ARM::DPRRegClass
- : 0;
+ : nullptr;
unsigned BytesLeft = SizeVal % UnitSize;
unsigned LoopSize = SizeVal - BytesLeft;
return BB;
}
+MachineBasicBlock *
+ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetMachine &TM = getTargetMachine();
+ const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ assert(Subtarget->isTargetWindows() &&
+ "__chkstk is only supported on Windows");
+ assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
+
+ // __chkstk takes the number of words to allocate on the stack in R4, and
+ // returns the stack adjustment in number of bytes in R4. This will not
+ // clober any other registers (other than the obvious lr).
+ //
+ // Although, technically, IP should be considered a register which may be
+ // clobbered, the call itself will not touch it. Windows on ARM is a pure
+ // thumb-2 environment, so there is no interworking required. As a result, we
+ // do not expect a veneer to be emitted by the linker, clobbering IP.
+ //
+ // Each module receives its own copy of __chkstk, so no import thunk is
+ // required, again, ensuring that IP is not clobbered.
+ //
+ // Finally, although some linkers may theoretically provide a trampoline for
+ // out of range calls (which is quite common due to a 32M range limitation of
+ // branches for Thumb), we can generate the long-call version via
+ // -mcmodel=large, alleviating the need for the trampoline which may clobber
+ // IP.
+
+ switch (TM.getCodeModel()) {
+ case CodeModel::Small:
+ case CodeModel::Medium:
+ case CodeModel::Default:
+ case CodeModel::Kernel:
+ BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addExternalSymbol("__chkstk")
+ .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Define)
+ .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
+ break;
+ case CodeModel::Large:
+ case CodeModel::JITDefault: {
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
+
+ BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
+ .addExternalSymbol("__chkstk");
+ BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addReg(Reg, RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Define)
+ .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
+ break;
+ }
+ }
+
+ AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr),
+ ARM::SP)
+ .addReg(ARM::SP).addReg(ARM::R4)));
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
switch (MI->getOpcode()) {
case ARM::COPY_STRUCT_BYVAL_I32:
++NumLoopByVals;
return EmitStructByval(MI, BB);
+ case ARM::WIN__CHKSTK:
+ return EmitLowered__chkstk(MI, BB);
}
}
// Rename pseudo opcodes.
unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
if (NewOpc) {
- const ARMBaseInstrInfo *TII =
- static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo());
+ const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
+ getTargetMachine().getSubtargetImpl()->getInstrInfo());
MCID = &TII->get(NewOpc);
assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
llvm_unreachable("Invalid vector element type for padd optimization.");
}
- SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
- widenType, &Ops[0], Ops.size());
+ SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), widenType, Ops);
unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
return DAG.getNode(ExtOp, SDLoc(N), VT, tmp);
}
// Look for the glued ADDE.
SDNode* AddeNode = AddcNode->getGluedUser();
- if (AddeNode == NULL)
+ if (!AddeNode)
return SDValue();
// Make sure it is really an ADDE.
// Figure out the high and low input values to the MLAL node.
SDValue* HiMul = &MULOp;
- SDValue* HiAdd = NULL;
- SDValue* LoMul = NULL;
- SDValue* LowAdd = NULL;
+ SDValue* HiAdd = nullptr;
+ SDValue* LoMul = nullptr;
+ SDValue* LowAdd = nullptr;
if (IsLeftOperandMUL)
HiAdd = &AddeOp1;
LowAdd = &AddcOp0;
}
- if (LoMul == NULL)
+ if (!LoMul)
return SDValue();
if (LoMul->getNode() != HiMul->getNode())
Ops.push_back(*HiAdd);
SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode),
- DAG.getVTList(MVT::i32, MVT::i32),
- &Ops[0], Ops.size());
+ DAG.getVTList(MVT::i32, MVT::i32), Ops);
// Replace the ADDs' nodes uses by the MLA node's values.
SDValue HiMLALResult(MLALNode.getNode(), 1);
std::min(4U, LD->getAlignment() / 2));
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
+ if (DCI.DAG.getTargetLoweringInfo().isBigEndian())
+ std::swap (NewLD1, NewLD2);
SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
- DCI.RemoveFromWorklist(LD);
- DAG.DeleteNode(LD);
return Result;
}
SDLoc DL(St);
SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio;
+ for (unsigned i = 0; i < NumElems; ++i)
+ ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio;
// Can't shuffle using an illegal type.
if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
Increment);
Chains.push_back(Ch);
}
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0],
- Chains.size());
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
}
if (!ISD::isNormalStore(St))
if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
StVal.getNode()->hasOneUse()) {
SelectionDAG &DAG = DCI.DAG;
+ bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian();
SDLoc DL(St);
SDValue BasePtr = St->getBasePtr();
SDValue NewST1 = DAG.getStore(St->getChain(), DL,
- StVal.getNode()->getOperand(0), BasePtr,
- St->getPointerInfo(), St->isVolatile(),
+ StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ),
+ BasePtr, St->getPointerInfo(), St->isVolatile(),
St->isNonTemporal(), St->getAlignment());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(4, MVT::i32));
- return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1),
+ return DAG.getStore(NewST1.getValue(0), DL,
+ StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
OffsetPtr, St->getPointerInfo(), St->isVolatile(),
St->isNonTemporal(),
std::min(4U, St->getAlignment() / 2));
return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
St->getPointerInfo(), St->isVolatile(),
St->isNonTemporal(), St->getAlignment(),
- St->getTBAAInfo());
+ St->getAAInfo());
}
/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
DCI.AddToWorklist(V.getNode());
}
EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts);
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops);
return DAG.getNode(ISD::BITCAST, dl, VT, BV);
}
// Fold obvious case.
V = V.getOperand(0);
else {
- V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
+ V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
// Make the DAGCombiner fold the bitcasts.
DCI.AddToWorklist(V.getNode());
}
Tys[n] = VecTy;
Tys[n++] = MVT::i32;
Tys[n] = MVT::Other;
- SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2);
+ SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumResultVecs+2));
SmallVector<SDValue, 8> Ops;
Ops.push_back(N->getOperand(0)); // incoming chain
Ops.push_back(N->getOperand(AddrOpIdx));
}
MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
- Ops.data(), Ops.size(),
- MemInt->getMemoryVT(),
+ Ops, MemInt->getMemoryVT(),
MemInt->getMemOperand());
// Update the uses.
for (n = 0; n < NumVecs; ++n)
Tys[n] = VT;
Tys[n] = MVT::Other;
- SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1);
+ SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumVecs+1));
SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
- Ops, 2, VLDMemInt->getMemoryVT(),
+ Ops, VLDMemInt->getMemoryVT(),
VLDMemInt->getMemOperand());
// Update the uses.
if (Res.getNode()) {
APInt KnownZero, KnownOne;
- DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne);
+ DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne);
// Capture demanded bits information that would be otherwise lost.
if (KnownZero == 0xfffffffe)
Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
}
-bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, unsigned,
- bool *Fast) const {
+bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned,
+ unsigned,
+ bool *Fast) const {
// The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
bool Fast;
if (Size >= 16 &&
(memOpAlign(SrcAlign, DstAlign, 16) ||
- (allowsUnalignedMemoryAccesses(MVT::v2f64, 0, &Fast) && Fast))) {
+ (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) {
return MVT::v2f64;
} else if (Size >= 8 &&
(memOpAlign(SrcAlign, DstAlign, 8) ||
- (allowsUnalignedMemoryAccesses(MVT::f64, 0, &Fast) && Fast))) {
+ (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) &&
+ Fast))) {
return MVT::f64;
}
}
return true;
}
-void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth) const {
+void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
unsigned BitWidth = KnownOne.getBitWidth();
KnownZero = KnownOne = APInt(BitWidth, 0);
switch (Op.getOpcode()) {
break;
case ARMISD::CMOV: {
// Bits are known zero/one if known on the LHS and RHS.
- DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
if (KnownZero == 0 && KnownOne == 0) return;
APInt KnownZeroRHS, KnownOneRHS;
- DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);
+ DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);
KnownZero &= KnownZeroRHS;
KnownOne &= KnownOneRHS;
return;
}
+ case ISD::INTRINSIC_W_CHAIN: {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
+ Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
+ switch (IntID) {
+ default: return;
+ case Intrinsic::arm_ldaex:
+ case Intrinsic::arm_ldrex: {
+ EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
+ unsigned MemBits = VT.getScalarType().getSizeInBits();
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
+ return;
+ }
+ }
+ }
}
}
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
- if (CallOperandVal == NULL)
+ if (!CallOperandVal)
return CW_Default;
Type *type = CallOperandVal->getType();
// Look at the constraint type.
std::string &Constraint,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
- SDValue Result(0, 0);
+ SDValue Result;
// Currently only support length 1 constraints.
if (Constraint.length() != 1) return;
Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL);
SDLoc dl(Op);
- TargetLowering::
- CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true,
- 0, getLibcallCallingConv(LC), /*isTailCall=*/false,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
- Callee, Args, DAG, dl);
- std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(InChain)
+ .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
+ .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
+ std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
return CallInfo.first;
}
+SDValue
+ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetWindows() && "unsupported target platform");
+ SDLoc DL(Op);
+
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+
+ SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
+ DAG.getConstant(2, MVT::i32));
+
+ SDValue Flag;
+ Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
+ Flag = Chain.getValue(1);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
+
+ SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
+ Chain = NewSP.getValue(1);
+
+ SDValue Ops[2] = { NewSP, Chain };
+ return DAG.getMergeValues(Ops, DL);
+}
+
bool
ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The ARM target isn't yet aware of offsets.
return false;
return true;
}
+
+bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
+ // Loads and stores less than 64-bits are already atomic; ones above that
+ // are doomed anyway, so defer to the default libcall and blame the OS when
+ // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
+ // anything for those.
+ bool IsMClass = Subtarget->isMClass();
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
+ return Size == 64 && !IsMClass;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ return LI->getType()->getPrimitiveSizeInBits() == 64 && !IsMClass;
+ }
+
+ // For the real atomic operations, we have ldrex/strex up to 32 bits,
+ // and up to 64 bits on the non-M profiles
+ unsigned AtomicLimit = IsMClass ? 32 : 64;
+ return Inst->getType()->getPrimitiveSizeInBits() <= AtomicLimit;
+}
+
+// This has so far only been implemented for MachO.
+bool ARMTargetLowering::useLoadStackGuardNode() const {
+ return Subtarget->getTargetTriple().getObjectFormat() == Triple::MachO;
+}
+
+Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+ AtomicOrdering Ord) const {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
+ bool IsAcquire =
+ Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+
+ // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
+ // intrinsic must return {i32, i32} and we have to recombine them into a
+ // single i64 here.
+ if (ValTy->getPrimitiveSizeInBits() == 64) {
+ Intrinsic::ID Int =
+ IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
+ Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int);
+
+ Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
+ Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
+
+ Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
+ Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
+ if (!Subtarget->isLittle())
+ std::swap (Lo, Hi);
+ Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
+ Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
+ return Builder.CreateOr(
+ Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
+ }
+
+ Type *Tys[] = { Addr->getType() };
+ Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
+ Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys);
+
+ return Builder.CreateTruncOrBitCast(
+ Builder.CreateCall(Ldrex, Addr),
+ cast<PointerType>(Addr->getType())->getElementType());
+}
+
+Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
+ Value *Addr,
+ AtomicOrdering Ord) const {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ bool IsRelease =
+ Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+
+ // Since the intrinsics must have legal type, the i64 intrinsics take two
+ // parameters: "i32, i32". We must marshal Val into the appropriate form
+ // before the call.
+ if (Val->getType()->getPrimitiveSizeInBits() == 64) {
+ Intrinsic::ID Int =
+ IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
+ Function *Strex = Intrinsic::getDeclaration(M, Int);
+ Type *Int32Ty = Type::getInt32Ty(M->getContext());
+
+ Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
+ Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
+ if (!Subtarget->isLittle())
+ std::swap (Lo, Hi);
+ Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
+ return Builder.CreateCall3(Strex, Lo, Hi, Addr);
+ }
+
+ Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
+ Type *Tys[] = { Addr->getType() };
+ Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
+
+ return Builder.CreateCall2(
+ Strex, Builder.CreateZExtOrBitCast(
+ Val, Strex->getFunctionType()->getParamType(0)),
+ Addr);
+}
+
+enum HABaseType {
+ HA_UNKNOWN = 0,
+ HA_FLOAT,
+ HA_DOUBLE,
+ HA_VECT64,
+ HA_VECT128
+};
+
+static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
+ uint64_t &Members) {
+ if (const StructType *ST = dyn_cast<StructType>(Ty)) {
+ for (unsigned i = 0; i < ST->getNumElements(); ++i) {
+ uint64_t SubMembers = 0;
+ if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
+ return false;
+ Members += SubMembers;
+ }
+ } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ uint64_t SubMembers = 0;
+ if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
+ return false;
+ Members += SubMembers * AT->getNumElements();
+ } else if (Ty->isFloatTy()) {
+ if (Base != HA_UNKNOWN && Base != HA_FLOAT)
+ return false;
+ Members = 1;
+ Base = HA_FLOAT;
+ } else if (Ty->isDoubleTy()) {
+ if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
+ return false;
+ Members = 1;
+ Base = HA_DOUBLE;
+ } else if (const VectorType *VT = dyn_cast<VectorType>(Ty)) {
+ Members = 1;
+ switch (Base) {
+ case HA_FLOAT:
+ case HA_DOUBLE:
+ return false;
+ case HA_VECT64:
+ return VT->getBitWidth() == 64;
+ case HA_VECT128:
+ return VT->getBitWidth() == 128;
+ case HA_UNKNOWN:
+ switch (VT->getBitWidth()) {
+ case 64:
+ Base = HA_VECT64;
+ return true;
+ case 128:
+ Base = HA_VECT128;
+ return true;
+ default:
+ return false;
+ }
+ }
+ }
+
+ return (Members > 0 && Members <= 4);
+}
+
+/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate.
+bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
+ if (getEffectiveCallingConv(CallConv, isVarArg) !=
+ CallingConv::ARM_AAPCS_VFP)
+ return false;
+
+ HABaseType Base = HA_UNKNOWN;
+ uint64_t Members = 0;
+ bool result = isHomogeneousAggregate(Ty, Base, Members);
+ DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n");
+ return result;
+}