#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
+#include <utility>
using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
class ARMCCState : public CCState {
public:
ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
- const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs,
+ const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs,
LLVMContext &C, ParmContext PC)
: CCState(CC, isVarArg, MF, TM, locs, C) {
assert(((PC == Call) || (PC == Prologue)) &&
}
void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
- addRegisterClass(VT, &ARM::QPRRegClass);
+ addRegisterClass(VT, &ARM::DPairRegClass);
addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
}
static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
- if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
+ if (TM.getSubtarget<ARMSubtarget>().isTargetMachO())
return new TargetLoweringObjectFileMachO();
return new ARMElfTargetObjectFile();
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
- if (Subtarget->isTargetDarwin()) {
+ if (Subtarget->isTargetMachO()) {
// Uses VFP for Thumb libfuncs if available.
- if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
+ if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
+ Subtarget->hasARMOps()) {
// Single-precision floating-point arithmetic.
setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
setLibcallName(RTLIB::SRL_I128, 0);
setLibcallName(RTLIB::SRA_I128, 0);
- if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) {
+ if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO()) {
// Double-precision floating-point arithmetic helper functions
// RTABI chapter 4.1.2, Table 2
setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
}
// Use divmod compiler-rt calls for iOS 5.0 and later.
- if (Subtarget->getTargetTriple().getOS() == Triple::IOS &&
+ if (Subtarget->getTargetTriple().isiOS() &&
!Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
}
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
if (Subtarget->hasNEON()) {
addDRTypeForNEON(MVT::v2f32);
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
- // Custom expand long extensions to vectors.
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
-
// NEON does not have single instruction CTPOP for vectors with element
// types wider than 8-bits. However, custom lowering can leverage the
// v8i8/v16i8 vcnt instruction.
setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
+
// Only ARMv6 has BSWAP.
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
setOperationAction(ISD::SDIV, MVT::i32, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
}
+
+ // FIXME: Also set divmod for SREM on EABI
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ // Register based DivRem for AEABI (RTABI 4.2)
+ if (Subtarget->isTargetAEABI()) {
+ setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod");
+ setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
+ setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");
+ setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod");
+ setLibcallName(RTLIB::UDIVREM_I8, "__aeabi_uidivmod");
+ setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod");
+ setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod");
+ setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod");
+
+ setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS);
+
+ setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
+ } else {
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ }
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- if (!Subtarget->isTargetDarwin()) {
- // Non-Darwin platforms may return values in these registers via the
+ if (!Subtarget->isTargetMachO()) {
+ // Non-MachO platforms may return values in these registers via the
// personality function.
- setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
- setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
setExceptionPointerRegister(ARM::R0);
setExceptionSelectorRegister(ARM::R1);
}
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
- // FIXME: This should be checking for v6k, not just v6.
- if (Subtarget->hasDataBarrier() ||
- (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
- // membarrier needs custom lowering; the rest are legal and handled
- // normally.
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
+ // ATOMIC_FENCE needs custom lowering; the other 32-bit ones are legal and
+ // handled normally.
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
// Custom lowering for 64-bit ops
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
- // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
- setInsertFencesForAtomic(true);
+ // On v8, we have particularly efficient implementations of atomic fences
+ // if they can be combined with nearby atomic loads and stores.
+ if (!Subtarget->hasV8Ops()) {
+ // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
+ setInsertFencesForAtomic(true);
+ }
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
} else {
+ // If there's anything we can use as a barrier, go through custom lowering
+ // for ATOMIC_FENCE.
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
+ Subtarget->hasAnyDataBarrier() ? Custom : Expand);
+
// Set them all for expansion, which will force libcalls.
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
// Unordered/Monotonic case.
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
- // Since the libcalls include locking, fold in the fences
- setShouldFoldAtomicFences(true);
}
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
}
}
+
+ // Combine sin / cos into one node or libcall if possible.
+ if (Subtarget->hasSinCos()) {
+ setLibcallName(RTLIB::SINCOS_F32, "sincosf");
+ setLibcallName(RTLIB::SINCOS_F64, "sincos");
+ if (Subtarget->getTargetTriple().getOS() == Triple::IOS) {
+ // For iOS, we don't want to the normal expansion of a libcall to
+ // sincos. We want to issue a libcall to __sincos_stret.
+ setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
+ }
+ }
// We have target-specific dag combine patterns for the following nodes:
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
+static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
+ bool isThumb2, unsigned &LdrOpc,
+ unsigned &StrOpc) {
+ static const unsigned LoadBares[4][2] = {{ARM::LDREXB, ARM::t2LDREXB},
+ {ARM::LDREXH, ARM::t2LDREXH},
+ {ARM::LDREX, ARM::t2LDREX},
+ {ARM::LDREXD, ARM::t2LDREXD}};
+ static const unsigned LoadAcqs[4][2] = {{ARM::LDAEXB, ARM::t2LDAEXB},
+ {ARM::LDAEXH, ARM::t2LDAEXH},
+ {ARM::LDAEX, ARM::t2LDAEX},
+ {ARM::LDAEXD, ARM::t2LDAEXD}};
+ static const unsigned StoreBares[4][2] = {{ARM::STREXB, ARM::t2STREXB},
+ {ARM::STREXH, ARM::t2STREXH},
+ {ARM::STREX, ARM::t2STREX},
+ {ARM::STREXD, ARM::t2STREXD}};
+ static const unsigned StoreRels[4][2] = {{ARM::STLEXB, ARM::t2STLEXB},
+ {ARM::STLEXH, ARM::t2STLEXH},
+ {ARM::STLEX, ARM::t2STLEX},
+ {ARM::STLEXD, ARM::t2STLEXD}};
+
+ const unsigned (*LoadOps)[2], (*StoreOps)[2];
+ if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+ LoadOps = LoadAcqs;
+ else
+ LoadOps = LoadBares;
+
+ if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+ StoreOps = StoreRels;
+ else
+ StoreOps = StoreBares;
+
+ assert(isPowerOf2_32(Size) && Size <= 8 &&
+ "unsupported size for atomic binary op!");
+
+ LdrOpc = LoadOps[Log2_32(Size)][isThumb2];
+ StrOpc = StoreOps[Log2_32(Size)][isThumb2];
+}
+
// FIXME: It might make sense to define the representative register class as the
// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
switch (Opcode) {
default: return 0;
case ARMISD::Wrapper: return "ARMISD::Wrapper";
- case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN";
case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
case ARMISD::CALL: return "ARMISD::CALL";
case ARMISD::BR_JT: return "ARMISD::BR_JT";
case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
+ case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
case ARMISD::CMP: return "ARMISD::CMP";
case ARMISD::CMN: return "ARMISD::CMN";
case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
- case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER";
case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
+ case ARMISD::VMAXNM: return "ARMISD::VMAX";
+ case ARMISD::VMINNM: return "ARMISD::VMIN";
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
}
}
-EVT ARMTargetLowering::getSetCCResultType(EVT VT) const {
+EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector()) return getPointerTy();
return VT.changeVectorElementTypeToInteger();
}
ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals,
bool isThisReturn, SDValue ThisVal) const {
// Pass 'this' value directly from the argument to return value, to avoid
// reg unit interference
if (i == 0 && isThisReturn) {
- assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32);
+ assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
+ "unexpected return calling convention register assignment");
InVals.push_back(ThisVal);
continue;
}
SDValue
ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
SDValue StackPtr, SDValue Arg,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
ISD::ArgFlagsTy Flags) const {
unsigned LocMemOffset = VA.getLocMemOffset();
false, false, 0);
}
-void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
+void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
SDValue Chain, SDValue &Arg,
RegsToPassVector &RegsToPass,
CCValAssign &VA, CCValAssign &NextVA,
SDValue &StackPtr,
- SmallVector<SDValue, 8> &MemOpChains,
+ SmallVectorImpl<SDValue> &MemOpChains,
ISD::ArgFlagsTy Flags) const {
SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
- DebugLoc &dl = CLI.DL;
- SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
- SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
- SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDLoc &dl = CLI.DL;
+ SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+ SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+ SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &isTailCall = CLI.IsTailCall;
bool isVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
- bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
- bool IsThisReturn = false;
- bool IsSibCall = false;
+ bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+ bool isThisReturn = false;
+ bool isSibCall = false;
// Disable tail calls if they're not supported.
if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
isTailCall = false;
if (isTailCall) {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
- isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+ isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
Outs, OutVals, Ins, DAG);
// We don't support GuaranteedTailCallOpt for ARM, only automatically
// detected sibcalls.
if (isTailCall) {
++NumTailCalls;
- IsSibCall = true;
+ isSibCall = true;
}
}
unsigned NumBytes = CCInfo.getNextStackOffset();
// For tail calls, memory operands are available in our caller's stack.
- if (IsSibCall)
+ if (isSibCall)
NumBytes = 0;
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- if (!IsSibCall)
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ if (!isSibCall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ dl);
SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
StackPtr, MemOpChains, Flags);
}
} else if (VA.isRegLoc()) {
- if (realArgIdx == 0 && Flags.isReturned() && VA.getLocVT() == MVT::i32) {
- assert(!Ins.empty() && Ins[0].VT == Outs[0].VT &&
+ if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) {
+ assert(VA.getLocVT() == MVT::i32 &&
+ "unexpected calling convention register assignment");
+ assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
"unexpected use of 'returned'");
- IsThisReturn = true;
+ isThisReturn = true;
}
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else if (isByVal) {
// True if this byval aggregate will be split between registers
// and memory.
- if (CCInfo.isFirstByValRegValid()) {
+ unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
+ unsigned CurByValIdx = CCInfo.getInRegsParamsProceed();
+
+ if (CurByValIdx < ByValArgsCount) {
+
+ unsigned RegBegin, RegEnd;
+ CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
+
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
unsigned int i, j;
- for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
+ for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
SDValue Const = DAG.getConstant(4*i, MVT::i32);
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
MachinePointerInfo(),
- false, false, false, 0);
+ false, false, false,
+ DAG.InferPtrAlignment(AddArg));
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(j, Load));
}
- offset = ARM::R4 - CCInfo.getFirstByValReg();
- CCInfo.clearFirstByValReg();
+
+ // If parameter size outsides register area, "offset" value
+ // helps us to calculate stack slot for remained part properly.
+ offset = RegEnd - RegBegin;
+
+ CCInfo.nextInRegsParam();
}
- if (Flags.getByValSize() - 4*offset > 0) {
+ if (Flags.getByValSize() > 4*offset) {
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
Ops, array_lengthof(Ops)));
}
- } else if (!IsSibCall) {
+ } else if (!isSibCall) {
assert(VA.isMemLoc());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
const GlobalValue *GV = G->getGlobal();
isDirect = true;
bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
- bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
+ bool isStub = (isExt && Subtarget->isTargetMachO()) &&
getTargetMachine().getRelocationModel() != Reloc::Static;
isARMFunc = !Subtarget->isThumb() || isStub;
// ARM call to a local ARM function is predicable.
isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
// tBX takes a register source operand.
- if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
- unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- ARMConstantPoolValue *CPV =
- ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4);
- SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
- CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- Callee = DAG.getLoad(getPointerTy(), dl,
- DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
- Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
- getPointerTy(), Callee, PICLabel);
+ if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+ assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
+ Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),
+ DAG.getTargetGlobalAddress(GV, dl, getPointerTy()));
} else {
// On ELF targets for PIC code, direct calls should go through the PLT
unsigned OpFlags = 0;
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
isDirect = true;
- bool isStub = Subtarget->isTargetDarwin() &&
+ bool isStub = Subtarget->isTargetMachO() &&
getTargetMachine().getRelocationModel() != Reloc::Static;
isARMFunc = !Subtarget->isThumb() || isStub;
// tBX takes a register source operand.
// FIXME: handle tail calls differently.
unsigned CallOpc;
- bool HasMinSizeAttr = MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+ bool HasMinSizeAttr = Subtarget->isMinSize();
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- const uint32_t *Mask;
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
- if (IsThisReturn)
- // For 'this' returns, use the R0-preserving mask
- Mask = ARI->getThisReturnPreservedMask(CallConv);
- else
- Mask = ARI->getCallPreservedMask(CallConv);
+ if (!isTailCall) {
+ const uint32_t *Mask;
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
+ if (isThisReturn) {
+ // For 'this' returns, use the R0-preserving mask if applicable
+ Mask = ARI->getThisReturnPreservedMask(CallConv);
+ if (!Mask) {
+ // Set isThisReturn to false if the calling convention is not one that
+ // allows 'returned' to be modeled in this way, so LowerCallResult does
+ // not try to pass 'this' straight through
+ isThisReturn = false;
+ Mask = ARI->getCallPreservedMask(CallConv);
+ }
+ } else
+ Mask = ARI->getCallPreservedMask(CallConv);
- assert(Mask && "Missing call preserved mask for calling convention");
- Ops.push_back(DAG.getRegisterMask(Mask));
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+ }
if (InFlag.getNode())
Ops.push_back(InFlag);
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(0, true), InFlag);
+ DAG.getIntPtrConstant(0, true), InFlag, dl);
if (!Ins.empty())
InFlag = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
// return.
return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
- InVals, IsThisReturn,
- IsThisReturn ? OutVals[0] : SDValue());
+ InVals, isThisReturn,
+ isThisReturn ? OutVals[0] : SDValue());
}
/// HandleByVal - Every parameter *after* a byval parameter is passed
assert((State->getCallOrPrologue() == Prologue ||
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
- if ((!State->isFirstByValRegValid()) &&
- (ARM::R0 <= reg) && (reg <= ARM::R3)) {
+
+ // For in-prologue parameters handling, we also introduce stack offset
+ // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal.
+ // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how
+ // NSAA should be evaluted (NSAA means "next stacked argument address").
+ // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs.
+ // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs.
+ unsigned NSAAOffset = State->getNextStackOffset();
+ if (State->getCallOrPrologue() != Call) {
+ for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) {
+ unsigned RB, RE;
+ State->getInRegsParamInfo(i, RB, RE);
+ assert(NSAAOffset >= (RE-RB)*4 &&
+ "Stack offset for byval regs doesn't introduced anymore?");
+ NSAAOffset -= (RE-RB)*4;
+ }
+ }
+ if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
if (Subtarget->isAAPCS_ABI() && Align > 4) {
unsigned AlignInRegs = Align / 4;
unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
reg = State->AllocateReg(GPRArgRegs, 4);
}
if (reg != 0) {
- State->setFirstByValReg(reg);
+ unsigned excess = 4 * (ARM::R4 - reg);
+
+ // Special case when NSAA != SP and parameter size greater than size of
+ // all remained GPR regs. In that case we can't split parameter, we must
+ // send it to stack. We also must set NCRN to R4, so waste all
+ // remained registers.
+ if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
+ while (State->AllocateReg(GPRArgRegs, 4))
+ ;
+ return;
+ }
+
+ // First register for byval parameter is the first register that wasn't
+ // allocated before this method call, so it would be "reg".
+ // If parameter is small enough to be saved in range [reg, r4), then
+ // the end (first after last) register would be reg + param-size-in-regs,
+ // else parameter would be splitted between registers and stack,
+ // end register would be r4 in this case.
+ unsigned ByValRegBegin = reg;
+ unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4;
+ State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
+ // Note, first register is allocated in the beginning of function already,
+ // allocate remained amount of registers we need.
+ for (unsigned i = reg+1; i != ByValRegEnd; ++i)
+ State->AllocateReg(GPRArgRegs, 4);
// At a call site, a byval parameter that is split between
// registers and memory needs its size truncated here. In a
// function prologue, such byval parameters are reassembled in
// memory, and are not truncated.
if (State->getCallOrPrologue() == Call) {
- unsigned excess = 4 * (ARM::R4 - reg);
- assert(size >= excess && "expected larger existing stack allocation");
- size -= excess;
+ // Make remained size equal to 0 in case, when
+ // the whole structure may be stored into registers.
+ if (size < excess)
+ size = 0;
+ else
+ size -= excess;
}
}
}
- // Confiscate any remaining parameter registers to preclude their
- // assignment to subsequent parameters.
- while (State->AllocateReg(GPRArgRegs, 4))
- ;
}
/// MatchingStackOffset - Return true if the given stack call argument is
if (isVarArg && !Outs.empty())
return false;
+ // Exception-handling functions need a special set of instructions to indicate
+ // a return to the hardware. Tail-calling another function would probably
+ // break this.
+ if (CallerF->hasFnAttribute("interrupt"))
+ return false;
+
// Also avoid sibcall optimization if either caller or callee uses struct
// return semantics.
if (isCalleeStructRet || isCallerStructRet)
// local frame.
const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
getInfo<ARMFunctionInfo>();
- if (AFI_Caller->getVarArgsRegSaveSize())
+ if (AFI_Caller->getArgRegsSaveSize())
return false;
// If the callee takes no arguments then go on to check the results of the
isVarArg));
}
+static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
+ SDLoc DL, SelectionDAG &DAG) {
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const Function *F = MF.getFunction();
+
+ StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
+
+ // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
+ // version of the "preferred return address". These offsets affect the return
+ // instruction if this is a return from PL1 without hypervisor extensions.
+ // IRQ/FIQ: +4 "subs pc, lr, #4"
+ // SWI: 0 "subs pc, lr, #0"
+ // ABORT: +4 "subs pc, lr, #4"
+ // UNDEF: +4/+2 "subs pc, lr, #0"
+ // UNDEF varies depending on where the exception came from ARM or Thumb
+ // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
+
+ int64_t LROffset;
+ if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
+ IntKind == "ABORT")
+ LROffset = 4;
+ else if (IntKind == "SWI" || IntKind == "UNDEF")
+ LROffset = 0;
+ else
+ report_fatal_error("Unsupported interrupt attribute. If present, value "
+ "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
+
+ RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false));
+
+ return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other,
+ RetOps.data(), RetOps.size());
+}
+
SDValue
ARMTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const {
+ SDLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of the return value to a location.
SmallVector<CCValAssign, 16> RVLocs;
if (Flag.getNode())
RetOps.push_back(Flag);
+ // CPUs which aren't M-class use a special sequence to return from
+ // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
+ // though we use "subs pc, lr, #N").
+ //
+ // M-class CPUs actually use a normal return sequence with a special
+ // (hardware-provided) value in LR, so the normal code path works.
+ if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
+ !Subtarget->isMClass()) {
+ if (Subtarget->isThumb1Only())
+ report_fatal_error("interrupt attribute is not supported in Thumb1");
+ return LowerInterruptReturn(RetOps, dl, DAG);
+ }
+
return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other,
RetOps.data(), RetOps.size());
}
Copy = *Copy->use_begin();
if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
return false;
- Chain = Copy->getOperand(0);
+ TCChain = Copy->getOperand(0);
} else {
return false;
}
bool HasRet = false;
for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
UI != UE; ++UI) {
- if (UI->getOpcode() != ARMISD::RET_FLAG)
+ if (UI->getOpcode() != ARMISD::RET_FLAG &&
+ UI->getOpcode() != ARMISD::INTRET_FLAG)
return false;
HasRet = true;
}
static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
EVT PtrVT = Op.getValueType();
// FIXME there is no actual debug info here
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
SDValue Res;
if (CP->isMachineConstantPoolEntry())
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = 0;
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
EVT PtrVT = getPointerTy();
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
SDValue
ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
SelectionDAG &DAG) const {
- DebugLoc dl = GA->getDebugLoc();
+ SDLoc dl(GA);
EVT PtrVT = getPointerTy();
unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
MachineFunction &MF = DAG.getMachineFunction();
SelectionDAG &DAG,
TLSModel::Model model) const {
const GlobalValue *GV = GA->getGlobal();
- DebugLoc dl = GA->getDebugLoc();
+ SDLoc dl(GA);
SDValue Offset;
SDValue Chain = DAG.getEntryNode();
EVT PtrVT = getPointerTy();
SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
- // FIXME: Enable this for static codegen when tool issues are fixed. Also
- // update ARMFastISel::ARMMaterializeGV.
- if (Subtarget->useMovt() && RelocM != Reloc::Static) {
+ if (Subtarget->useMovt())
++NumMovwMovt;
- // FIXME: Once remat is capable of dealing with instructions with register
- // operands, expand this into two nodes.
- if (RelocM == Reloc::Static)
- return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
- DAG.getTargetGlobalAddress(GV, dl, PtrVT));
-
- unsigned Wrapper = (RelocM == Reloc::PIC_)
- ? ARMISD::WrapperPIC : ARMISD::WrapperDYN;
- SDValue Result = DAG.getNode(Wrapper, dl, PtrVT,
- DAG.getTargetGlobalAddress(GV, dl, PtrVT));
- if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
- Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(),
- false, false, false, 0);
- return Result;
- }
- unsigned ARMPCLabelIndex = 0;
- SDValue CPAddr;
- if (RelocM == Reloc::Static) {
- CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
- } else {
- ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
- ARMPCLabelIndex = AFI->createPICLabelUId();
- unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
- ARMConstantPoolValue *CPV =
- ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue,
- PCAdj);
- CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
- }
- CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into multiple nodes
+ unsigned Wrapper =
+ RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper;
- SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
- SDValue Chain = Result.getValue(1);
-
- if (RelocM == Reloc::PIC_) {
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
- Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
- }
+ SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
+ SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
- Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
- false, false, false, 0);
-
+ Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(), false, false, false, 0);
return Result;
}
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
EVT PtrVT = getPointerTy();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
ARMConstantPoolValue *CPV =
ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
SDValue
ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue Val = DAG.getConstant(0, MVT::i32);
return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
SDValue
ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
Op.getOperand(1), DAG.getConstant(0, MVT::i32));
}
ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::arm_thread_pointer: {
case Intrinsic::arm_neon_vmullu: {
unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
? ARMISD::VMULLs : ARMISD::VMULLu;
- return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(),
+ return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
}
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
// FIXME: handle "fence singlethread" more efficiently.
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (!Subtarget->hasDataBarrier()) {
// Some ARMv6 cpus can support data barriers with an mcr instruction.
// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
// here.
assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
- "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
+ "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
DAG.getConstant(0, MVT::i32));
}
- return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
- DAG.getConstant(ARM_MB::ISH, MVT::i32));
+ ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
+ unsigned Domain = ARM_MB::ISH;
+ if (Subtarget->isMClass()) {
+ // Only a full system barrier exists in the M-class architectures.
+ Domain = ARM_MB::SY;
+ } else if (Subtarget->isSwift() && Ord == Release) {
+ // Swift happens to implement ISHST barriers in a way that's compatible with
+ // Release semantics but weaker than ISH so we'd be fools not to use
+ // it. Beware: other processors probably don't!
+ Domain = ARM_MB::ISHST;
+ }
+
+ return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(Intrinsic::arm_dmb, MVT::i32),
+ DAG.getConstant(Domain, MVT::i32));
}
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
// Just preserve the chain.
return Op.getOperand(0);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
if (!isRead &&
(!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
SDValue
ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
SDValue &Root, SelectionDAG &DAG,
- DebugLoc dl) const {
+ SDLoc dl) const {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
void
ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
- unsigned &VARegSize, unsigned &VARegSaveSize)
+ unsigned InRegsParamRecordIdx,
+ unsigned ArgSize,
+ unsigned &ArgRegsSize,
+ unsigned &ArgRegsSaveSize)
const {
unsigned NumGPRs;
- if (CCInfo.isFirstByValRegValid())
- NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
- else {
+ if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
+ unsigned RBegin, REnd;
+ CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
+ NumGPRs = REnd - RBegin;
+ } else {
unsigned int firstUnalloced;
firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
sizeof(GPRArgRegs) /
}
unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
- VARegSize = NumGPRs * 4;
- VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
+ ArgRegsSize = NumGPRs * 4;
+
+ // If parameter is split between stack and GPRs...
+ if (NumGPRs && Align == 8 &&
+ (ArgRegsSize < ArgSize ||
+ InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) {
+ // Add padding for part of param recovered from GPRs, so
+ // its last byte must be at address K*8 - 1.
+ // We need to do it, since remained (stack) part of parameter has
+ // stack alignment, and we need to "attach" "GPRs head" without gaps
+ // to it:
+ // Stack:
+ // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes...
+ // [ [padding] [GPRs head] ] [ Tail passed via stack ....
+ //
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned Padding =
+ ((ArgRegsSize + AFI->getArgRegsSaveSize() + Align - 1) & ~(Align-1)) -
+ (ArgRegsSize + AFI->getArgRegsSaveSize());
+ ArgRegsSaveSize = ArgRegsSize + Padding;
+ } else
+ // We don't need to extend regs save size for byval parameters if they
+ // are passed via GPRs only.
+ ArgRegsSaveSize = ArgRegsSize;
}
// The remaining GPRs hold either the beginning of variable-argument
// If this is a variadic function, the va_list pointer will begin with
// these values; otherwise, this reassembles a (byval) structure that
// was split between registers and memory.
-void
-ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
- DebugLoc dl, SDValue &Chain,
- const Value *OrigArg,
- unsigned OffsetFromOrigArg,
- unsigned ArgOffset,
- bool ForceMutable) const {
+// Return: The frame index registers were stored into.
+int
+ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
+ SDLoc dl, SDValue &Chain,
+ const Value *OrigArg,
+ unsigned InRegsParamRecordIdx,
+ unsigned OffsetFromOrigArg,
+ unsigned ArgOffset,
+ unsigned ArgSize,
+ bool ForceMutable) const {
+
+ // Currently, two use-cases possible:
+ // Case #1. Non-var-args function, and we meet first byval parameter.
+ // Setup first unallocated register as first byval register;
+ // eat all remained registers
+ // (these two actions are performed by HandleByVal method).
+ // Then, here, we initialize stack frame with
+ // "store-reg" instructions.
+ // Case #2. Var-args function, that doesn't contain byval parameters.
+ // The same: eat all remained unallocated registers,
+ // initialize stack frame.
+
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned firstRegToSaveIndex;
- if (CCInfo.isFirstByValRegValid())
- firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
- else {
+ unsigned firstRegToSaveIndex, lastRegToSaveIndex;
+ unsigned RBegin, REnd;
+ if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
+ CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
+ firstRegToSaveIndex = RBegin - ARM::R0;
+ lastRegToSaveIndex = REnd - ARM::R0;
+ } else {
firstRegToSaveIndex = CCInfo.getFirstUnallocated
- (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
- }
-
- unsigned VARegSize, VARegSaveSize;
- computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
- if (VARegSaveSize) {
- // If this function is vararg, store any remaining integer argument regs
- // to their spots on the stack so that they may be loaded by deferencing
- // the result of va_next.
- AFI->setVarArgsRegSaveSize(VARegSaveSize);
- AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize,
- ArgOffset + VARegSaveSize
- - VARegSize,
- false));
- SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
- getPointerTy());
+ (GPRArgRegs, array_lengthof(GPRArgRegs));
+ lastRegToSaveIndex = 4;
+ }
+
+ unsigned ArgRegsSize, ArgRegsSaveSize;
+ computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize,
+ ArgRegsSize, ArgRegsSaveSize);
+
+ // Store any by-val regs to their spots on the stack so that they may be
+ // loaded by deferencing the result of formal parameter pointer or va_next.
+ // Note: once stack area for byval/varargs registers
+ // was initialized, it can't be initialized again.
+ if (ArgRegsSaveSize) {
+
+ unsigned Padding = ArgRegsSaveSize - ArgRegsSize;
+
+ if (Padding) {
+ assert(AFI->getStoredByValParamsPadding() == 0 &&
+ "The only parameter may be padded.");
+ AFI->setStoredByValParamsPadding(Padding);
+ }
+
+ int FrameIndex = MFI->CreateFixedObject(
+ ArgRegsSaveSize,
+ Padding + ArgOffset,
+ false);
+ SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
SmallVector<SDValue, 4> MemOps;
- for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) {
+ for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
+ ++firstRegToSaveIndex, ++i) {
const TargetRegisterClass *RC;
if (AFI->isThumb1OnlyFunction())
RC = &ARM::tGPRRegClass;
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
DAG.getConstant(4, getPointerTy()));
}
+
+ AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
+
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOps[0], MemOps.size());
+ return FrameIndex;
} else
// This will point to the next argument passed via stack.
- AFI->setVarArgsFrameIndex(
- MFI->CreateFixedObject(4, ArgOffset, !ForceMutable));
+ return MFI->CreateFixedObject(
+ 4, AFI->getStoredByValParamsPadding() + ArgOffset, !ForceMutable);
+}
+
+// Setup stack frame, the va_list pointer will start from.
+void
+ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ SDLoc dl, SDValue &Chain,
+ unsigned ArgOffset,
+ bool ForceMutable) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Try to store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ // If there is no regs to be stored, just point address after last
+ // argument passed via stack.
+ int FrameIndex =
+ StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(),
+ 0, ArgOffset, 0, ForceMutable);
+
+ AFI->setVarArgsFrameIndex(FrameIndex);
}
SDValue
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg>
&Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
MachineFunction &MF = DAG.getMachineFunction();
SDValue ArgValue;
Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
+
+ // Initially ArgRegsSaveSize is zero.
+ // Then we increase this value each time we meet byval parameter.
+ // We also increase this value in case of varargs function.
+ AFI->setArgRegsSaveSize(0);
+
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
// Since they could be overwritten by lowering of arguments in case of
// a tail call.
if (Flags.isByVal()) {
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- if (!AFI->getVarArgsFrameIndex()) {
- VarArgStyleRegisters(CCInfo, DAG,
- dl, Chain, CurOrigArg,
- Ins[VA.getValNo()].PartOffset,
- VA.getLocMemOffset(),
- true /*force mutable frames*/);
- int VAFrameIndex = AFI->getVarArgsFrameIndex();
- InVals.push_back(DAG.getFrameIndex(VAFrameIndex, getPointerTy()));
- } else {
- int FI = MFI->CreateFixedObject(Flags.getByValSize(),
- VA.getLocMemOffset(), false);
- InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
- }
+ unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
+ int FrameIndex = StoreByValRegs(
+ CCInfo, DAG, dl, Chain, CurOrigArg,
+ CurByValIndex,
+ Ins[VA.getValNo()].PartOffset,
+ VA.getLocMemOffset(),
+ Flags.getByValSize(),
+ true /*force mutable frames*/);
+ InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
+ CCInfo.nextInRegsParam();
} else {
+ unsigned FIOffset = VA.getLocMemOffset() +
+ AFI->getStoredByValParamsPadding();
int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
- VA.getLocMemOffset(), true);
+ FIOffset, true);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
// varargs
if (isVarArg)
- VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0, 0,
+ VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
CCInfo.getNextStackOffset());
return Chain;
SDValue
ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG,
- DebugLoc dl) const {
+ SDLoc dl) const {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
unsigned C = RHSC->getZExtValue();
if (!isLegalICmpImmediate(C)) {
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
SDValue
ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
- DebugLoc dl) const {
+ SDLoc dl) const {
SDValue Cmp;
if (!isFloatingPointZero(RHS))
Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
SDValue
ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
unsigned Opc = Cmp.getOpcode();
- DebugLoc DL = Cmp.getDebugLoc();
+ SDLoc DL(Cmp);
if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
SDValue Cond = Op.getOperand(0);
SDValue SelectTrue = Op.getOperand(1);
SDValue SelectFalse = Op.getOperand(2);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
// Convert:
//
SelectTrue, SelectFalse, ISD::SETNE);
}
+static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) {
+ if (CC == ISD::SETNE)
+ return ISD::SETEQ;
+ return ISD::getSetCCInverse(CC, true);
+}
+
+static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+ bool &swpCmpOps, bool &swpVselOps) {
+ // Start by selecting the GE condition code for opcodes that return true for
+ // 'equality'
+ if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
+ CC == ISD::SETULE)
+ CondCode = ARMCC::GE;
+
+ // and GT for opcodes that return false for 'equality'.
+ else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
+ CC == ISD::SETULT)
+ CondCode = ARMCC::GT;
+
+ // Since we are constrained to GE/GT, if the opcode contains 'less', we need
+ // to swap the compare operands.
+ if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
+ CC == ISD::SETULT)
+ swpCmpOps = true;
+
+ // Both GT and GE are ordered comparisons, and return false for 'unordered'.
+ // If we have an unordered opcode, we need to swap the operands to the VSEL
+ // instruction (effectively negating the condition).
+ //
+ // This also has the effect of swapping which one of 'less' or 'greater'
+ // returns true, so we also swap the compare operands. It also switches
+ // whether we return true for 'equality', so we compensate by picking the
+ // opposite condition code to our original choice.
+ if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
+ CC == ISD::SETUGT) {
+ swpCmpOps = !swpCmpOps;
+ swpVselOps = !swpVselOps;
+ CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
+ }
+
+ // 'ordered' is 'anything but unordered', so use the VS condition code and
+ // swap the VSEL operands.
+ if (CC == ISD::SETO) {
+ CondCode = ARMCC::VS;
+ swpVselOps = true;
+ }
+
+ // 'unordered or not equal' is 'anything but equal', so use the EQ condition
+ // code and swap the VSEL operands.
+ if (CC == ISD::SETUNE) {
+ CondCode = ARMCC::EQ;
+ swpVselOps = true;
+ }
+}
+
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
SDValue TrueVal = Op.getOperand(2);
SDValue FalseVal = Op.getOperand(3);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (LHS.getValueType() == MVT::i32) {
+ // Try to generate VSEL on ARMv8.
+ // The VSEL instruction can't use all the usual ARM condition
+ // codes: it only has two bits to select the condition code, so it's
+ // constrained to use only GE, GT, VS and EQ.
+ //
+ // To implement all the various ISD::SETXXX opcodes, we sometimes need to
+ // swap the operands of the previous compare instruction (effectively
+ // inverting the compare condition, swapping 'less' and 'greater') and
+ // sometimes need to swap the operands to the VSEL (which inverts the
+ // condition in the sense of firing whenever the previous condition didn't)
+ if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
+ CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
+ CC = getInverseCCForVSEL(CC);
+ std::swap(TrueVal, FalseVal);
+ }
+ }
+
SDValue ARMcc;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
- return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
+ Cmp);
}
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);
+ // Try to generate VSEL on ARMv8.
+ if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
+ // We can select VMAXNM/VMINNM from a compare followed by a select with the
+ // same operands, as follows:
+ // c = fcmp [ogt, olt, ugt, ult] a, b
+ // select c, a, b
+ // We only do this in unsafe-fp-math, because signed zeros and NaNs are
+ // handled differently than the original code sequence.
+ if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal &&
+ RHS == FalseVal) {
+ if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+ return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
+ if (CC == ISD::SETOLT || CC == ISD::SETULT)
+ return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+ }
+
+ bool swpCmpOps = false;
+ bool swpVselOps = false;
+ checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
+
+ if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
+ CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
+ if (swpCmpOps)
+ std::swap(LHS, RHS);
+ if (swpVselOps)
+ std::swap(TrueVal, FalseVal);
+ }
+ }
+
SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
return DAG.getConstant(0, MVT::i32);
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
- return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ return DAG.getLoad(MVT::i32, SDLoc(Op),
Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
Ld->isInvariant(), Ld->getAlignment());
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
SDValue Ptr = Ld->getBasePtr();
- RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ RetVal1 = DAG.getLoad(MVT::i32, SDLoc(Op),
Ld->getChain(), Ptr,
Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
EVT PtrType = Ptr.getValueType();
unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
- SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
+ SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(Op),
PtrType, Ptr, DAG.getConstant(4, PtrType));
- RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ RetVal2 = DAG.getLoad(MVT::i32, SDLoc(Op),
Ld->getChain(), NewPtr,
Ld->getPointerInfo().getWithOffset(4),
Ld->isVolatile(), Ld->isNonTemporal(),
SDValue LHS = Op.getOperand(2);
SDValue RHS = Op.getOperand(3);
SDValue Dest = Op.getOperand(4);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
bool LHSSeenZero = false;
bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
SDValue LHS = Op.getOperand(2);
SDValue RHS = Op.getOperand(3);
SDValue Dest = Op.getOperand(4);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (LHS.getValueType() == MVT::i32) {
SDValue ARMcc;
SDValue Chain = Op.getOperand(0);
SDValue Table = Op.getOperand(1);
SDValue Index = Op.getOperand(2);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT PTy = getPointerTy();
JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (Op.getValueType().getVectorElementType() == MVT::i32) {
if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
if (VT.isVector())
return LowerVectorFP_TO_INT(Op, DAG);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned Opc;
switch (Op.getOpcode()) {
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
if (VT.getVectorElementType() == MVT::f32)
if (VT.isVector())
return LowerVectorINT_TO_FP(Op, DAG);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned Opc;
switch (Op.getOpcode()) {
// Implement fcopysign with a fabs and a conditional fneg.
SDValue Tmp0 = Op.getOperand(0);
SDValue Tmp1 = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT VT = Op.getValueType();
EVT SrcVT = Tmp1.getValueType();
bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setReturnAddressIsTaken(true);
+ if (verifyReturnAddressArgumentIsConstant(Op, DAG))
+ return SDValue();
+
EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
MFI->setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
+ SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
+ unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetMachO())
? ARM::R7 : ARM::R11;
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
return FrameAddr;
}
-/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec),
-/// and size(DestVec) > 128-bits.
-/// This is achieved by doing the one extension from the SrcVec, splitting the
-/// result, extending these parts, and then concatenating these into the
-/// destination.
-static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) {
- SDValue Op = N->getOperand(0);
- EVT SrcVT = Op.getValueType();
- EVT DestVT = N->getValueType(0);
-
- assert(DestVT.getSizeInBits() > 128 &&
- "Custom sext/zext expansion needs >128-bit vector.");
- // If this is a normal length extension, use the default expansion.
- if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() &&
- SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits())
- return SDValue();
-
- DebugLoc dl = N->getDebugLoc();
- unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
- unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits();
- unsigned NumElts = SrcVT.getVectorNumElements();
- LLVMContext &Ctx = *DAG.getContext();
- SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi;
-
- EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
- NumElts);
- EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
- NumElts/2);
- EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize),
- NumElts/2);
-
- Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op);
- SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
- DAG.getIntPtrConstant(0));
- SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
- DAG.getIntPtrConstant(NumElts/2));
- ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo);
- ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi);
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi);
-}
-
/// ExpandBITCAST - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
/// vectors), since the legalizer won't know what to do with that.
static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SDValue Op = N->getOperand(0);
// This function is only supposed to be called for i64 types, either as the
/// not support i64 elements, so sometimes the zero vectors will need to be
/// explicitly constructed. Regardless, use a canonical VMOV to create the
/// zero vector.
-static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) {
assert(VT.isVector() && "Expected a vector type");
// The canonical modified immediate encoding of a zero vector is....0!
SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
// The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
// so that the shift + and get folded into a bitfield extract.
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
DAG.getConstant(Intrinsic::arm_get_fpscr,
MVT::i32));
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
if (!ST->hasV6T2Ops())
return SDValue();
/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits)
static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
/// v4i16:Extracted = [k0 k1 k2 k3 ]
static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
if (VT.is64BitVector()) {
///
static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
if (!VT.isVector())
return SDValue();
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
// We can get here for a node like i32 = ISD::SHL i32, i64
if (VT != MVT::i64)
SDValue CC = Op.getOperand(2);
EVT VT = Op.getValueType();
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (Op.getOperand(1).getValueType().isFloatingPoint()) {
switch (SetCCOpcode) {
SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const {
- if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
+ if (!ST->hasVFP3())
return SDValue();
+ bool IsDouble = Op.getValueType() == MVT::f64;
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
- assert(Op.getValueType() == MVT::f32 &&
- "ConstantFP custom lowering should only occur for f32.");
// Try splatting with a VMOV.f32...
APFloat FPVal = CFP->getValueAPF();
- int ImmVal = ARM_AM::getFP32Imm(FPVal);
+ int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
+
if (ImmVal != -1) {
- DebugLoc DL = Op.getDebugLoc();
+ if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
+ // We have code in place to select a valid ConstantFP already, no need to
+ // do any mangling.
+ return Op;
+ }
+
+ // It's a float and we are trying to use NEON operations where
+ // possible. Lower it to a splat followed by an extract.
+ SDLoc DL(Op);
SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
NewVal);
DAG.getConstant(0, MVT::i32));
}
- // If that fails, try a VMOV.i32
+ // The rest of our options are NEON only, make sure that's allowed before
+ // proceeding..
+ if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
+ return SDValue();
+
EVT VMovVT;
- unsigned iVal = FPVal.bitcastToAPInt().getZExtValue();
- SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false,
- VMOVModImm);
+ uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
+
+ // It wouldn't really be worth bothering for doubles except for one very
+ // important value, which does happen to match: 0.0. So make sure we don't do
+ // anything stupid.
+ if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
+ return SDValue();
+
+ // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
+ SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
+ false, VMOVModImm);
if (NewVal != SDValue()) {
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
NewVal);
+ if (IsDouble)
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
+
+ // It's a float: cast and extract a vector element.
SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
VecConstant);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
}
// Finally, try a VMVN.i32
- NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false,
- VMVNModImm);
+ NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
+ false, VMVNModImm);
if (NewVal != SDValue()) {
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
+
+ if (IsDouble)
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
+
+ // It's a float: cast and extract a vector element.
SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
VecConstant);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
// instruction, return an SDValue of such a constant (will become a MOV
// instruction). Otherwise return null.
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
- const ARMSubtarget *ST, DebugLoc dl) {
+ const ARMSubtarget *ST, SDLoc dl) {
uint64_t Val;
if (!isa<ConstantSDNode>(N))
return SDValue();
SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const {
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT VT = Op.getValueType();
APInt SplatBits, SplatUndef;
if (ValueCounts.size() == 0)
return DAG.getUNDEF(VT);
- if (isOnlyLowElement)
+ // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
+ // Keep going if we are hitting this case.
+ if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
+ // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
+ // know the default expansion would otherwise fall back on something even
+ // worse. For a vector with one or two non-undef values, that's
+ // scalar_to_vector for the elements followed by a shuffle (provided the
+ // shuffle is valid for the target) and materialization element by element
+ // on the stack followed by a load for everything else.
+ if (!isConstant && !usesOnlyOneValue) {
+ SDValue Vec = DAG.getUNDEF(VT);
+ for (unsigned i = 0 ; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ SDValue LaneIdx = DAG.getConstant(i, MVT::i32);
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
+ }
+ return Vec;
+ }
+
return SDValue();
}
// shuffle in combination with VEXTs.
SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements();
/// the specified operations to build the shuffle.
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
SDValue RHS, SelectionDAG &DAG,
- DebugLoc dl) {
+ SDLoc dl) {
unsigned OpNum = (PFEntry >> 26) & 0x0F;
unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
// Check to see if we can use the VTBL instruction.
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
SmallVector<SDValue, 8> VTBLMask;
for (ArrayRef<int>::iterator
static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
SelectionDAG &DAG) {
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
SDValue OpLHS = Op.getOperand(0);
EVT VT = OpLHS.getValueType();
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT VT = Op.getValueType();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
SDValue Vec = Op.getOperand(0);
if (Op.getValueType() == MVT::i32 &&
Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
}
// two 64-bit vectors are concatenated to a 128-bit vector.
assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
"unexpected CONCAT_VECTORS");
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue Val = DAG.getUNDEF(MVT::v2f64);
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
return false;
}
+static EVT getExtensionTo64Bits(const EVT &OrigVT) {
+ if (OrigVT.getSizeInBits() >= 64)
+ return OrigVT;
+
+ assert(OrigVT.isSimple() && "Expecting a simple value type");
+
+ MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
+ switch (OrigSimpleTy) {
+ default: llvm_unreachable("Unexpected Vector Type");
+ case MVT::v2i8:
+ case MVT::v2i16:
+ return MVT::v2i32;
+ case MVT::v4i8:
+ return MVT::v4i16;
+ }
+}
+
/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
/// We insert the required extension here to get the vector to fill a D register.
return N;
// Must extend size to at least 64 bits to be used as an operand for VMULL.
- MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy;
- EVT NewVT;
- switch (OrigSimpleTy) {
- default: llvm_unreachable("Unexpected Orig Vector Type");
- case MVT::v2i8:
- case MVT::v2i16:
- NewVT = MVT::v2i32;
- break;
- case MVT::v4i8:
- NewVT = MVT::v4i16;
- break;
- }
- return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N);
+ EVT NewVT = getExtensionTo64Bits(OrigTy);
+
+ return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
}
/// SkipLoadExtensionForVMULL - return a load of the original vector size that
/// reach a total size of 64 bits. We have to add the extension separately
/// because ARM does not have a sign/zero extending load for vectors.
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
- SDValue NonExtendingLoad =
- DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(),
+ EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
+
+ // The load already has the right type.
+ if (ExtendedTy == LD->getMemoryVT())
+ return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
LD->isNonTemporal(), LD->isInvariant(),
LD->getAlignment());
- unsigned ExtOp = 0;
- switch (LD->getExtensionType()) {
- default: llvm_unreachable("Unexpected LoadExtType");
- case ISD::EXTLOAD:
- case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break;
- case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break;
- }
- MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy;
- MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy;
- return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG,
- MemType, ExtType, ExtOp);
+
+ // We need to create a zextload/sextload. We cannot just create a load
+ // followed by a zext/zext node because LowerMUL is also run during normal
+ // operation legalization where we can't create illegal types.
+ return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
+ LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
+ LD->getMemoryVT(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
}
/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
- return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32,
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32,
BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
}
// Construct a new BUILD_VECTOR with elements truncated to half the size.
// The values are implicitly truncated so sext vs. zext doesn't matter.
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
}
- return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
}
}
// Legalize to a VMULL instruction.
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
SDValue Op0;
SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
if (!isMLA) {
}
static SDValue
-LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
+LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
// Convert to float
// float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
// float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
}
static SDValue
-LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
+LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) {
SDValue N2;
// Convert to float.
// float4 yf = vcvt_f32_s32(vmovl_s16(y));
assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
"unexpected type for custom-lowering ISD::SDIV");
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2, N3;
assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
"unexpected type for custom-lowering ISD::UDIV");
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2, N3;
}
if (!ExtraOp)
- return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
Op.getOperand(1));
- return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
Op.getOperand(1), Op.getOperand(2));
}
+SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetDarwin());
+
+ // For iOS, we want to call an alternative entry point: __sincos_stret,
+ // return values are passed via sret.
+ SDLoc dl(Op);
+ SDValue Arg = Op.getOperand(0);
+ EVT ArgVT = Arg.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Pair of floats / doubles used to pass the result.
+ StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+
+ // Create stack object for sret.
+ const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy);
+ const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy);
+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+ SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy());
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+
+ Entry.Node = SRet;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Entry.isSRet = true;
+ Args.push_back(Entry);
+
+ Entry.Node = Arg;
+ Entry.Ty = ArgTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ const char *LibcallName = (ArgVT == MVT::f64)
+ ? "__sincos_stret" : "__sincosf_stret";
+ SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false, 0,
+ CallingConv::C, /*isTaillCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed*/false,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
+ MachinePointerInfo(), false, false, false, 0);
+
+ // Address of cos field.
+ SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet,
+ DAG.getIntPtrConstant(ArgVT.getStoreSize()));
+ SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,
+ MachinePointerInfo(), false, false, false, 0);
+
+ SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
+ LoadSin.getValue(0), LoadCos.getValue(0));
+}
+
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
// Monotonic load/store is legal for all targets
if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
return SDValue();
}
-
static void
ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
- SelectionDAG &DAG, unsigned NewOp) {
- DebugLoc dl = Node->getDebugLoc();
+ SelectionDAG &DAG) {
+ SDLoc dl(Node);
assert (Node->getValueType(0) == MVT::i64 &&
"Only know how to expand i64 atomics");
+ AtomicSDNode *AN = cast<AtomicSDNode>(Node);
SmallVector<SDValue, 6> Ops;
Ops.push_back(Node->getOperand(0)); // Chain
Ops.push_back(Node->getOperand(1)); // Ptr
- // Low part of Val1
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(2), DAG.getIntPtrConstant(0)));
- // High part of Val1
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(2), DAG.getIntPtrConstant(1)));
- if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) {
- // High part of Val1
+ for(unsigned i=2; i<Node->getNumOperands(); i++) {
+ // Low part
Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(3), DAG.getIntPtrConstant(0)));
- // High part of Val2
+ Node->getOperand(i), DAG.getIntPtrConstant(0)));
+ // High part
Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(3), DAG.getIntPtrConstant(1)));
+ Node->getOperand(i), DAG.getIntPtrConstant(1)));
}
SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
SDValue Result =
- DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64,
- cast<MemSDNode>(Node)->getMemOperand());
+ DAG.getAtomic(Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(),
+ cast<MemSDNode>(Node)->getMemOperand(), AN->getOrdering(),
+ AN->getSynchScope());
SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) };
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
Results.push_back(Result.getValue(2));
}
+static void ReplaceREADCYCLECOUNTER(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ SDLoc DL(N);
+ SDValue Cycles32, OutChain;
+
+ if (Subtarget->hasPerfMon()) {
+ // Under Power Management extensions, the cycle-count is:
+ // mrc p15, #0, <Rt>, c9, c13, #0
+ SDValue Ops[] = { N->getOperand(0), // Chain
+ DAG.getConstant(Intrinsic::arm_mrc, MVT::i32),
+ DAG.getConstant(15, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ DAG.getConstant(9, MVT::i32),
+ DAG.getConstant(13, MVT::i32),
+ DAG.getConstant(0, MVT::i32)
+ };
+
+ Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
+ DAG.getVTList(MVT::i32, MVT::Other), &Ops[0],
+ array_lengthof(Ops));
+ OutChain = Cycles32.getValue(1);
+ } else {
+ // Intrinsic is defined to return 0 on unsupported platforms. Technically
+ // there are older ARM CPUs that have implementation-specific ways of
+ // obtaining this information (FIXME!).
+ Cycles32 = DAG.getConstant(0, MVT::i32);
+ OutChain = DAG.getEntryNode();
+ }
+
+
+ SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
+ Cycles32, DAG.getConstant(0, MVT::i32));
+ Results.push_back(Cycles64);
+ Results.push_back(OutChain);
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GlobalAddress:
- return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
+ return Subtarget->isTargetMachO() ? LowerGlobalAddressDarwin(Op, DAG) :
LowerGlobalAddressELF(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
+ case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
+ case ISD::SDIVREM:
+ case ISD::UDIVREM: return LowerDivRem(Op, DAG);
}
}
case ISD::BITCAST:
Res = ExpandBITCAST(N, DAG);
break;
- case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- Res = ExpandVectorExtension(N, DAG);
- break;
case ISD::SRL:
case ISD::SRA:
Res = Expand64BitShift(N, DAG, Subtarget);
break;
- case ISD::ATOMIC_LOAD_ADD:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG);
+ case ISD::READCYCLECOUNTER:
+ ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
return;
+ case ISD::ATOMIC_STORE:
+ case ISD::ATOMIC_LOAD:
+ case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_AND:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_NAND:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_OR:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG);
- return;
case ISD::ATOMIC_LOAD_SUB:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG);
- return;
case ISD::ATOMIC_LOAD_XOR:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG);
- return;
case ISD::ATOMIC_SWAP:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG);
- return;
case ISD::ATOMIC_CMP_SWAP:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG);
- return;
case ISD::ATOMIC_LOAD_MIN:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG);
- return;
case ISD::ATOMIC_LOAD_UMIN:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG);
- return;
case ISD::ATOMIC_LOAD_MAX:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG);
- return;
case ISD::ATOMIC_LOAD_UMAX:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG);
+ ReplaceATOMIC_OP_64(N, Results, DAG);
return;
}
if (Res.getNode())
unsigned oldval = MI->getOperand(2).getReg();
unsigned newval = MI->getOperand(3).getReg();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
}
unsigned ldrOpc, strOpc;
- switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
- case 1:
- ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
- break;
- case 2:
- ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
- strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
- break;
- case 4:
- ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
- strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
- break;
- }
+ getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
MachineFunction *MF = BB->getParent();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
unsigned dest = MI->getOperand(0).getReg();
unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
if (isThumb2) {
MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(incr, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc;
- switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
- case 1:
- ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
- break;
- case 2:
- ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
- strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
- break;
- case 4:
- ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
- strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
- break;
- }
+ getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg();
unsigned oldval = dest;
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
if (isThumb2) {
MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(incr, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc, extendOpc;
+ getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ default: llvm_unreachable("unsupported size for AtomicBinaryMinMax!");
case 1:
- ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
break;
case 2:
- ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
- strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
break;
case 4:
- ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
- strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
extendOpc = 0;
break;
}
// Sign extend the value, if necessary.
if (signExtend && extendOpc) {
- oldval = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ oldval = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass
+ : &ARM::GPRnopcRegClass);
+ if (!isThumb2)
+ MRI.constrainRegClass(dest, &ARM::GPRnopcRegClass);
AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval)
.addReg(dest)
.addImm(0));
unsigned Op1, unsigned Op2,
bool NeedsCarry, bool IsCmpxchg,
bool IsMinMax, ARMCC::CondCodes CC) const {
- // This also handles ATOMIC_SWAP, indicated by Op1==0.
+ // This also handles ATOMIC_SWAP and ATOMIC_STORE, indicated by Op1==0.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = BB;
++It;
+ bool isStore = (MI->getOpcode() == ARM::ATOMIC_STORE_I64);
+ unsigned offset = (isStore ? -2 : 0);
unsigned destlo = MI->getOperand(0).getReg();
unsigned desthi = MI->getOperand(1).getReg();
- unsigned ptr = MI->getOperand(2).getReg();
- unsigned vallo = MI->getOperand(3).getReg();
- unsigned valhi = MI->getOperand(4).getReg();
+ unsigned ptr = MI->getOperand(offset+2).getReg();
+ unsigned vallo = MI->getOperand(offset+3).getReg();
+ unsigned valhi = MI->getOperand(offset+4).getReg();
+ unsigned OrdIdx = offset + (IsCmpxchg ? 7 : 5);
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(OrdIdx).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(vallo, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(valhi, &ARM::rGPRRegClass);
}
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc);
+
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *contBB = 0, *cont2BB = 0;
if (IsCmpxchg || IsMinMax)
// fallthrough --> exitMBB
BB = loopMBB;
- // Load
- if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD))
- .addReg(destlo, RegState::Define)
- .addReg(desthi, RegState::Define)
- .addReg(ptr));
- } else {
- unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD))
- .addReg(GPRPair0, RegState::Define).addReg(ptr));
- // Copy r2/r3 into dest. (This copy will normally be coalesced.)
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
- .addReg(GPRPair0, 0, ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
- .addReg(GPRPair0, 0, ARM::gsub_1);
+ if (!isStore) {
+ // Load
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
+ .addReg(destlo, RegState::Define)
+ .addReg(desthi, RegState::Define)
+ .addReg(ptr));
+ } else {
+ unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
+ .addReg(GPRPair0, RegState::Define).addReg(ptr));
+ // Copy r2/r3 into dest. (This copy will normally be coalesced.)
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
+ .addReg(GPRPair0, 0, ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
+ .addReg(GPRPair0, 0, ARM::gsub_1);
+ }
}
unsigned StoreLo, StoreHi;
// Store
if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess)
+ MRI.constrainRegClass(StoreLo, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(StoreHi, &ARM::rGPRRegClass);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
.addReg(StoreLo).addReg(StoreHi).addReg(ptr));
} else {
// Marshal a pair...
.addImm(ARM::gsub_1);
// ...and store it
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
.addReg(StorePair).addReg(ptr));
}
// Cmp+jump
return BB;
}
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicLoad64(MachineInstr *MI, MachineBasicBlock *BB) const {
+
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ unsigned destlo = MI->getOperand(0).getReg();
+ unsigned desthi = MI->getOperand(1).getReg();
+ unsigned ptr = MI->getOperand(2).getReg();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ if (isThumb2) {
+ MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ }
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc);
+
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(ldrOpc));
+
+ if (isThumb2) {
+ MIB.addReg(destlo, RegState::Define)
+ .addReg(desthi, RegState::Define)
+ .addReg(ptr);
+
+ } else {
+ unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ MIB.addReg(GPRPair0, RegState::Define).addReg(ptr);
+
+ // Copy GPRPair0 into dest. (This copy will normally be coalesced.)
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), destlo)
+ .addReg(GPRPair0, 0, ARM::gsub_0);
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), desthi)
+ .addReg(GPRPair0, 0, ARM::gsub_1);
+ }
+ AddDefaultPred(MIB);
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
/// registers the function context.
void ARMTargetLowering::
llvm_unreachable("Expecting a BB with two successors!");
}
-MachineBasicBlock *ARMTargetLowering::
-EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
+/// Return the load opcode for a given load size. If load size >= 8,
+/// neon opcode will be returned.
+static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
+ if (LdSize >= 8)
+ return LdSize == 16 ? ARM::VLD1q32wb_fixed
+ : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
+ if (IsThumb1)
+ return LdSize == 4 ? ARM::tLDRi
+ : LdSize == 2 ? ARM::tLDRHi
+ : LdSize == 1 ? ARM::tLDRBi : 0;
+ if (IsThumb2)
+ return LdSize == 4 ? ARM::t2LDR_POST
+ : LdSize == 2 ? ARM::t2LDRH_POST
+ : LdSize == 1 ? ARM::t2LDRB_POST : 0;
+ return LdSize == 4 ? ARM::LDR_POST_IMM
+ : LdSize == 2 ? ARM::LDRH_POST
+ : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
+}
+
+/// Return the store opcode for a given store size. If store size >= 8,
+/// neon opcode will be returned.
+static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
+ if (StSize >= 8)
+ return StSize == 16 ? ARM::VST1q32wb_fixed
+ : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
+ if (IsThumb1)
+ return StSize == 4 ? ARM::tSTRi
+ : StSize == 2 ? ARM::tSTRHi
+ : StSize == 1 ? ARM::tSTRBi : 0;
+ if (IsThumb2)
+ return StSize == 4 ? ARM::t2STR_POST
+ : StSize == 2 ? ARM::t2STRH_POST
+ : StSize == 1 ? ARM::t2STRB_POST : 0;
+ return StSize == 4 ? ARM::STR_POST_IMM
+ : StSize == 2 ? ARM::STRH_POST
+ : StSize == 1 ? ARM::STRB_POST_IMM : 0;
+}
+
+/// Emit a post-increment load operation with given size. The instructions
+/// will be added to BB at Pos.
+static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos,
+ const TargetInstrInfo *TII, DebugLoc dl,
+ unsigned LdSize, unsigned Data, unsigned AddrIn,
+ unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
+ unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
+ assert(LdOpc != 0 && "Should have a load opcode");
+ if (LdSize >= 8) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addImm(0));
+ } else if (IsThumb1) {
+ // load + update AddrIn
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrIn).addImm(0));
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(AddrIn).addImm(LdSize);
+ AddDefaultPred(MIB);
+ } else if (IsThumb2) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addImm(LdSize));
+ } else { // arm
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addReg(0).addImm(LdSize));
+ }
+}
+
+/// Emit a post-increment store operation with given size. The instructions
+/// will be added to BB at Pos.
+static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos,
+ const TargetInstrInfo *TII, DebugLoc dl,
+ unsigned StSize, unsigned Data, unsigned AddrIn,
+ unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
+ unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
+ assert(StOpc != 0 && "Should have a store opcode");
+ if (StSize >= 8) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(AddrIn).addImm(0).addReg(Data));
+ } else if (IsThumb1) {
+ // store + update AddrIn
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data)
+ .addReg(AddrIn).addImm(0));
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(AddrIn).addImm(StSize);
+ AddDefaultPred(MIB);
+ } else if (IsThumb2) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(Data).addReg(AddrIn).addImm(StSize));
+ } else { // arm
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(Data).addReg(AddrIn).addReg(0)
+ .addImm(StSize));
+ }
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitStructByval(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
// This pseudo instruction has 3 operands: dst, src, size
// We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
// Otherwise, we will generate unrolled scalar copies.
unsigned Align = MI->getOperand(3).getImm();
DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned ldrOpc, strOpc, UnitSize = 0;
+ unsigned UnitSize = 0;
+ const TargetRegisterClass *TRC = 0;
+ const TargetRegisterClass *VecTRC = 0;
- const TargetRegisterClass *TRC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
- const TargetRegisterClass *TRC_Vec = 0;
+ bool IsThumb1 = Subtarget->isThumb1Only();
+ bool IsThumb2 = Subtarget->isThumb2();
if (Align & 1) {
- ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
UnitSize = 1;
} else if (Align & 2) {
- ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST;
- strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST;
UnitSize = 2;
} else {
// Check whether we can use NEON instructions.
hasAttribute(AttributeSet::FunctionIndex,
Attribute::NoImplicitFloat) &&
Subtarget->hasNEON()) {
- if ((Align % 16 == 0) && SizeVal >= 16) {
- ldrOpc = ARM::VLD1q32wb_fixed;
- strOpc = ARM::VST1q32wb_fixed;
+ if ((Align % 16 == 0) && SizeVal >= 16)
UnitSize = 16;
- TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass;
- }
- else if ((Align % 8 == 0) && SizeVal >= 8) {
- ldrOpc = ARM::VLD1d32wb_fixed;
- strOpc = ARM::VST1d32wb_fixed;
+ else if ((Align % 8 == 0) && SizeVal >= 8)
UnitSize = 8;
- TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass;
- }
}
// Can't use NEON instructions.
- if (UnitSize == 0) {
- ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
+ if (UnitSize == 0)
UnitSize = 4;
- }
}
+ // Select the correct opcode and register class for unit size load/store
+ bool IsNeon = UnitSize >= 8;
+ TRC = (IsThumb1 || IsThumb2) ? (const TargetRegisterClass *)&ARM::tGPRRegClass
+ : (const TargetRegisterClass *)&ARM::GPRRegClass;
+ if (IsNeon)
+ VecTRC = UnitSize == 16
+ ? (const TargetRegisterClass *)&ARM::DPairRegClass
+ : UnitSize == 8
+ ? (const TargetRegisterClass *)&ARM::DPRRegClass
+ : 0;
+
unsigned BytesLeft = SizeVal % UnitSize;
unsigned LoopSize = SizeVal - BytesLeft;
unsigned srcIn = src;
unsigned destIn = dest;
for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
- unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
- if (UnitSize >= 8) {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc), scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(destIn).addImm(0).addReg(scratch));
- } else if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc), scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addImm(UnitSize));
- } else {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc), scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0)
- .addImm(UnitSize));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(UnitSize));
- }
+ unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
+ emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
srcIn = srcOut;
destIn = destOut;
}
// Handle the leftover bytes with LDRB and STRB.
// [scratch, srcOut] = LDRB_POST(srcIn, 1)
// [destOut] = STRB_POST(scratch, destIn, 1)
- ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
for (unsigned i = 0; i < BytesLeft; i++) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(1));
- } else {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn)
- .addReg(0).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(1));
- }
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
srcIn = srcOut;
destIn = destOut;
}
// Load an immediate to varEnd.
unsigned varEnd = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- unsigned VReg1 = varEnd;
+ if (IsThumb2) {
+ unsigned Vtmp = varEnd;
if ((LoopSize & 0xFFFF0000) != 0)
- VReg1 = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1)
- .addImm(LoopSize & 0xFFFF));
+ Vtmp = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp)
+ .addImm(LoopSize & 0xFFFF));
if ((LoopSize & 0xFFFF0000) != 0)
AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
- .addReg(VReg1)
- .addImm(LoopSize >> 16));
+ .addReg(Vtmp).addImm(LoopSize >> 16));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
Align = getDataLayout()->getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
- .addReg(varEnd, RegState::Define)
- .addConstantPoolIndex(Idx)
- .addImm(0));
+ if (IsThumb1)
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg(
+ varEnd, RegState::Define).addConstantPoolIndex(Idx));
+ else
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg(
+ varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0));
}
BB->addSuccessor(loopMBB);
// [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
// [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
- unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
- if (UnitSize >= 8) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
- .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0));
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
- .addReg(destPhi).addImm(0).addReg(scratch));
- } else if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
- .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize));
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
- .addReg(scratch).addReg(destPhi)
- .addImm(UnitSize));
- } else {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
- .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0)
- .addImm(UnitSize));
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
- .addReg(scratch).addReg(destPhi)
- .addReg(0).addImm(UnitSize));
- }
+ unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
+ emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
+ IsThumb1, IsThumb2);
// Decrement loop variable by UnitSize.
- MachineInstrBuilder MIB = BuildMI(BB, dl,
- TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
- AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
- MIB->getOperand(5).setReg(ARM::CPSR);
- MIB->getOperand(5).setIsDef(true);
-
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ if (IsThumb1) {
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop);
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(varPhi).addImm(UnitSize);
+ AddDefaultPred(MIB);
+ } else {
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, BB->end(), dl,
+ TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
+ AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
+ MIB->getOperand(5).setReg(ARM::CPSR);
+ MIB->getOperand(5).setIsDef(true);
+ }
+ BuildMI(*BB, BB->end(), dl,
+ TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
// loopMBB can loop back to loopMBB or fall through to exitMBB.
BB->addSuccessor(loopMBB);
// Add epilogue to handle BytesLeft.
BB = exitMBB;
MachineInstr *StartOfExit = exitMBB->begin();
- ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
// [scratch, srcOut] = LDRB_POST(srcLoop, 1)
// [destOut] = STRB_POST(scratch, destLoop, 1)
unsigned srcIn = srcLoop;
unsigned destIn = destLoop;
for (unsigned i = 0; i < BytesLeft; i++) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addImm(1));
- } else {
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(1));
- }
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
srcIn = srcOut;
destIn = destOut;
}
case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
+ case ARM::ATOMIC_LOAD_I64:
+ return EmitAtomicLoad64(MI, BB);
- case ARM::ATOMADD6432:
+ case ARM::ATOMIC_LOAD_ADD_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr,
isThumb2 ? ARM::t2ADCrr : ARM::ADCrr,
/*NeedsCarry*/ true);
- case ARM::ATOMSUB6432:
+ case ARM::ATOMIC_LOAD_SUB_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true);
- case ARM::ATOMOR6432:
+ case ARM::ATOMIC_LOAD_OR_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr,
isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
- case ARM::ATOMXOR6432:
+ case ARM::ATOMIC_LOAD_XOR_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr,
isThumb2 ? ARM::t2EORrr : ARM::EORrr);
- case ARM::ATOMAND6432:
+ case ARM::ATOMIC_LOAD_AND_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr,
isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
- case ARM::ATOMSWAP6432:
+ case ARM::ATOMIC_STORE_I64:
+ case ARM::ATOMIC_SWAP_I64:
return EmitAtomicBinary64(MI, BB, 0, 0, false);
- case ARM::ATOMCMPXCHG6432:
+ case ARM::ATOMIC_CMP_SWAP_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ false, /*IsCmpxchg*/true);
- case ARM::ATOMMIN6432:
+ case ARM::ATOMIC_LOAD_MIN_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
/*IsMinMax*/ true, ARMCC::LT);
- case ARM::ATOMMAX6432:
+ case ARM::ATOMIC_LOAD_MAX_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
/*IsMinMax*/ true, ARMCC::GE);
- case ARM::ATOMUMIN6432:
+ case ARM::ATOMIC_LOAD_UMIN_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
/*IsMinMax*/ true, ARMCC::LO);
- case ARM::ATOMUMAX6432:
+ case ARM::ATOMIC_LOAD_UMAX_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
// Slct is now know to be the desired identity constant when CC is true.
SDValue TrueVal = OtherOp;
- SDValue FalseVal = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT,
+ SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
OtherOp, NonConstantVal);
// Unless SwapSelectOps says CC should be false.
if (SwapSelectOps)
std::swap(TrueVal, FalseVal);
- return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+ return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
CCOp, TrueVal, FalseVal);
}
llvm_unreachable("Invalid vector element type for padd optimization.");
}
- SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+ SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
widenType, &Ops[0], Ops.size());
- return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, tmp);
}
static SDValue findMUL_LOHI(SDValue V) {
assert(AddcNode->getNumValues() == 2 &&
AddcNode->getValueType(0) == MVT::i32 &&
- AddcNode->getValueType(1) == MVT::Glue &&
- "Expect ADDC with two result values: i32, glue");
+ "Expect ADDC with two result values. First: i32");
+
+ // Check that we have a glued ADDC node.
+ if (AddcNode->getValueType(1) != MVT::Glue)
+ return SDValue();
// Check that the ADDC adds the low result of the S/UMUL_LOHI.
if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
Ops.push_back(*LowAdd);
Ops.push_back(*HiAdd);
- SDValue MLALNode = DAG.getNode(FinalOpc, AddcNode->getDebugLoc(),
+ SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode),
DAG.getVTList(MVT::i32, MVT::i32),
&Ops[0], Ops.size());
/// is faster than
/// vadd d3, d0, d1
/// vmul d3, d3, d2
+// However, for (A + B) * (A + B),
+// vadd d2, d0, d1
+// vmul d3, d0, d2
+// vmla d3, d1, d2
+// is slower than
+// vadd d2, d0, d1
+// vmul d3, d2, d2
static SDValue PerformVMULCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
std::swap(N0, N1);
}
+ if (N0 == N1)
+ return SDValue();
+
EVT VT = N->getValueType(0);
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
SDValue N00 = N0->getOperand(0);
SDValue N01 = N0->getOperand(1);
return DAG.getNode(Opcode, DL, VT,
return SDValue();
int64_t MulAmt = C->getSExtValue();
- unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
+ unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt);
ShiftAmt = ShiftAmt & (32 - 1);
SDValue V = N->getOperand(0);
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
SDValue Res;
MulAmt >>= ShiftAmt;
// Attempt to use immediate-form VBIC
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
const ARMSubtarget *Subtarget) {
// Attempt to use immediate-form VORR
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
unsigned SplatBitSize;
bool HasAnyUndefs;
+ APInt SplatBits0, SplatBits1;
BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
- APInt SplatBits0;
+ BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
+ // Ensure that the second operand of both ands are constants
if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
- HasAnyUndefs) && !HasAnyUndefs) {
- BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
- APInt SplatBits1;
- if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
- HasAnyUndefs) && !HasAnyUndefs &&
- SplatBits0 == ~SplatBits1) {
- // Canonicalize the vector type to make instruction selection simpler.
- EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
- SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
- N0->getOperand(1), N0->getOperand(0),
- N1->getOperand(0));
- return DAG.getNode(ISD::BITCAST, dl, VT, Result);
- }
+ HasAnyUndefs) && !HasAnyUndefs) {
+ if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
+ HasAnyUndefs) && !HasAnyUndefs) {
+ // Ensure that the bit width of the constants are the same and that
+ // the splat arguments are logical inverses as per the pattern we
+ // are trying to simplify.
+ if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
+ SplatBits0 == ~SplatBits1) {
+ // Canonicalize the vector type to make instruction selection
+ // simpler.
+ EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+ SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
+ N0->getOperand(1),
+ N0->getOperand(0),
+ N1->getOperand(0));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ }
+ }
}
}
if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
return SDValue();
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
// 1) or (and A, mask), val => ARMbfi A, val, mask
// iff (val & mask) == val
//
return SDValue();
if (ARM::isBitFieldInvertedMask(Mask)) {
- Val >>= CountTrailingZeros_32(~Mask);
+ Val >>= countTrailingZeros(~Mask);
Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
DAG.getConstant(Val, MVT::i32),
(Mask == 0xffff || Mask == 0xffff0000))
return SDValue();
// 2a
- unsigned amt = CountTrailingZeros_32(Mask2);
+ unsigned amt = countTrailingZeros(Mask2);
Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
DAG.getConstant(amt, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
(Mask2 == 0xffff || Mask2 == 0xffff0000))
return SDValue();
// 2b
- unsigned lsb = CountTrailingZeros_32(Mask);
+ unsigned lsb = countTrailingZeros(Mask);
Res = DAG.getNode(ISD::SRL, DL, VT, N00,
DAG.getConstant(lsb, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
// where lsb(mask) == #shamt and masked bits of B are known zero.
SDValue ShAmt = N00.getOperand(1);
unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
- unsigned LSB = CountTrailingZeros_32(Mask);
+ unsigned LSB = countTrailingZeros(Mask);
if (ShAmtC != LSB)
return SDValue();
if (!N11C)
return SDValue();
unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
- unsigned LSB = CountTrailingZeros_32(~InvMask);
- unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB;
+ unsigned LSB = countTrailingZeros(~InvMask);
+ unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB;
unsigned Mask = (1 << Width)-1;
unsigned Mask2 = N11C->getZExtValue();
if ((Mask & (~Mask2)) == 0)
- return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0),
+ return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
N->getOperand(0), N1.getOperand(0),
N->getOperand(2));
}
LoadSDNode *LD = cast<LoadSDNode>(InNode);
SelectionDAG &DAG = DCI.DAG;
- DebugLoc DL = LD->getDebugLoc();
+ SDLoc DL(LD);
SDValue BasePtr = LD->getBasePtr();
SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
LD->getPointerInfo(), LD->isVolatile(),
if (Op0.getOpcode() == ARMISD::VMOVRRD &&
Op0.getNode() == Op1.getNode() &&
Op0.getResNo() == 0 && Op1.getResNo() == 1)
- return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, SDLoc(N),
N->getValueType(0), Op0.getOperand(0));
return SDValue();
}
NumElems*SizeRatio);
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
- DebugLoc DL = St->getDebugLoc();
+ SDLoc DL(St);
SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio;
if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
StVal.getNode()->hasOneUse()) {
SelectionDAG &DAG = DCI.DAG;
- DebugLoc DL = St->getDebugLoc();
+ SDLoc DL(St);
SDValue BasePtr = St->getBasePtr();
SDValue NewST1 = DAG.getStore(St->getChain(), DL,
StVal.getNode()->getOperand(0), BasePtr,
// Bitcast an i64 store extracted from a vector to f64.
// Otherwise, the i64 value will be legalized to a pair of i32 values.
SelectionDAG &DAG = DCI.DAG;
- DebugLoc dl = StVal.getDebugLoc();
+ SDLoc dl(StVal);
SDValue IntVec = StVal.getOperand(0);
EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
IntVec.getValueType().getVectorNumElements());
SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
Vec, StVal.getOperand(1));
- dl = N->getDebugLoc();
+ dl = SDLoc(N);
SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
// Make the DAGCombiner fold the bitcasts.
DCI.AddToWorklist(Vec.getNode());
EVT VT = N->getValueType(0);
if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
return SDValue();
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SmallVector<SDValue, 8> Ops;
unsigned NumElts = VT.getVectorNumElements();
for (unsigned i = 0; i < NumElts; ++i) {
return DAG.getNode(ISD::BITCAST, dl, VT, BV);
}
+/// \brief Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
+static SDValue
+PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
+ // At that time, we may have inserted bitcasts from integer to float.
+ // If these bitcasts have survived DAGCombine, change the lowering of this
+ // BUILD_VECTOR in something more vector friendly, i.e., that does not
+ // force to use floating point types.
+
+ // Make sure we can change the type of the vector.
+ // This is possible iff:
+ // 1. The vector is only used in a bitcast to a integer type. I.e.,
+ // 1.1. Vector is used only once.
+ // 1.2. Use is a bit convert to an integer type.
+ // 2. The size of its operands are 32-bits (64-bits are not legal).
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+
+ // Check 1.1. and 2.
+ if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
+ return SDValue();
+
+ // By construction, the input type must be float.
+ assert(EltVT == MVT::f32 && "Unexpected type!");
+
+ // Check 1.2.
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() != ISD::BITCAST ||
+ Use->getValueType(0).isFloatingPoint())
+ return SDValue();
+
+ // Check profitability.
+ // Model is, if more than half of the relevant operands are bitcast from
+ // i32, turn the build_vector into a sequence of insert_vector_elt.
+ // Relevant operands are everything that is not statically
+ // (i.e., at compile time) bitcasted.
+ unsigned NumOfBitCastedElts = 0;
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumOfRelevantElts = NumElts;
+ for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
+ SDValue Elt = N->getOperand(Idx);
+ if (Elt->getOpcode() == ISD::BITCAST) {
+ // Assume only bit cast to i32 will go away.
+ if (Elt->getOperand(0).getValueType() == MVT::i32)
+ ++NumOfBitCastedElts;
+ } else if (Elt.getOpcode() == ISD::UNDEF || isa<ConstantSDNode>(Elt))
+ // Constants are statically casted, thus do not count them as
+ // relevant operands.
+ --NumOfRelevantElts;
+ }
+
+ // Check if more than half of the elements require a non-free bitcast.
+ if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ // Create the new vector type.
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+ // Check if the type is legal.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isTypeLegal(VecVT))
+ return SDValue();
+
+ // Combine:
+ // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
+ // => BITCAST INSERT_VECTOR_ELT
+ // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
+ // (BITCAST EN), N.
+ SDValue Vec = DAG.getUNDEF(VecVT);
+ SDLoc dl(N);
+ for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
+ SDValue V = N->getOperand(Idx);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (V.getOpcode() == ISD::BITCAST &&
+ V->getOperand(0).getValueType() == MVT::i32)
+ // Fold obvious case.
+ V = V.getOperand(0);
+ else {
+ V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(V.getNode());
+ }
+ SDValue LaneIdx = DAG.getConstant(Idx, MVT::i32);
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
+ }
+ Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(Vec.getNode());
+ return Vec;
+}
+
/// PerformInsertEltCombine - Target-specific dag combine xforms for
/// ISD::INSERT_VECTOR_ELT.
static SDValue PerformInsertEltCombine(SDNode *N,
return SDValue();
SelectionDAG &DAG = DCI.DAG;
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
VT.getVectorNumElements());
SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
!TLI.isTypeLegal(Concat1Op1.getValueType()))
return SDValue();
- SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+ SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
Op0.getOperand(0), Op1.getOperand(0));
// Translate the shuffle mask.
SmallVector<int, 16> NewMask;
NewElt = HalfElts + MaskElt - NumElts;
NewMask.push_back(NewElt);
}
- return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat,
+ return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
DAG.getUNDEF(VT), NewMask.data());
}
Ops.push_back(N->getOperand(i));
}
MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
- SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys,
+ SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
Ops.data(), Ops.size(),
MemInt->getMemoryVT(),
MemInt->getMemOperand());
SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1);
SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
- SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys,
+ SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
Ops, 2, VLDMemInt->getMemoryVT(),
VLDMemInt->getMemOperand());
if (EltSize > VT.getVectorElementType().getSizeInBits())
return SDValue();
- return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
+ return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
}
// isConstVecPow2 - Return true if each vector element is a power of 2, all
!isConstVecPow2(ConstVec, isSigned, C))
return SDValue();
+ MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
+ MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
+ if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) {
+ // These instructions only exist converting from f32 to i32. We can handle
+ // smaller integers by generating an extra truncate, but larger ones would
+ // be lossy.
+ return SDValue();
+ }
+
unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
Intrinsic::arm_neon_vcvtfp2fxu;
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
- N->getValueType(0),
- DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
- DAG.getConstant(Log2_64(C), MVT::i32));
+ unsigned NumLanes = Op.getValueType().getVectorNumElements();
+ SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
+ NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
+ DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
+ DAG.getConstant(Log2_64(C), MVT::i32));
+
+ if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
+ FixConv = DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), FixConv);
+
+ return FixConv;
}
/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
!isConstVecPow2(ConstVec, isSigned, C))
return SDValue();
+ MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
+ MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
+ if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) {
+ // These instructions only exist converting from i32 to f32. We can handle
+ // smaller integers by generating an extra extend, but larger ones would
+ // be lossy.
+ return SDValue();
+ }
+
+ SDValue ConvInput = Op.getOperand(0);
+ unsigned NumLanes = Op.getValueType().getVectorNumElements();
+ if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
+ ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ SDLoc(N), NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
+ ConvInput);
+
unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
Intrinsic::arm_neon_vcvtfxu2fp;
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
Op.getValueType(),
DAG.getConstant(IntrinsicOpcode, MVT::i32),
- Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32));
+ ConvInput, DAG.getConstant(Log2_64(C), MVT::i32));
}
/// Getvshiftimm - Check if this is a valid build_vector for the immediate
VShiftOpc = ARMISD::VQRSHRNsu; break;
}
- return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
+ return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),
N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
}
llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
}
- return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
+ return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2),
DAG.getConstant(Cnt, MVT::i32));
}
if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
DAG.MaskedValueIsZero(N0.getOperand(0),
APInt::getHighBitsSet(32, 16)))
- return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1);
+ return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1);
}
}
case ISD::SHL:
if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
- return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
+ return DAG.getNode(ARMISD::VSHL, SDLoc(N), VT, N->getOperand(0),
DAG.getConstant(Cnt, MVT::i32));
break;
if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
ARMISD::VSHRs : ARMISD::VSHRu);
- return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
+ return DAG.getNode(VShiftOpc, SDLoc(N), VT, N->getOperand(0),
DAG.getConstant(Cnt, MVT::i32));
}
}
Opc = ARMISD::VGETLANEu;
break;
}
- return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
+ return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
}
}
if (!Opcode)
return SDValue();
- return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
+ return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
}
/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
return SDValue();
EVT VT = N->getValueType(0);
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
SDValue LHS = Cmp.getOperand(0);
SDValue RHS = Cmp.getOperand(1);
SDValue FalseVal = N->getOperand(0);
case ARMISD::VLD3DUP:
case ARMISD::VLD4DUP:
return CombineBaseUpdate(N, DCI);
+ case ARMISD::BUILD_VECTOR:
+ return PerformARMBUILD_VECTORCombine(N, DCI);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
return false;
}
+bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+ return false;
+
+ if (!isTypeLegal(EVT::getEVT(Ty1)))
+ return false;
+
+ assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
+
+ // Assuming the caller doesn't have a zeroext or signext return parameter,
+ // truncation all the way down to i1 is valid.
+ return true;
+}
+
+
static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
if (V < 0)
return false;
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
- KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
+ unsigned BitWidth = KnownOne.getBitWidth();
+ KnownZero = KnownOne = APInt(BitWidth, 0);
switch (Op.getOpcode()) {
default: break;
+ case ARMISD::ADDC:
+ case ARMISD::ADDE:
+ case ARMISD::SUBC:
+ case ARMISD::SUBE:
+ // These nodes' second result is a boolean
+ if (Op.getResNo() == 0)
+ break;
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ break;
case ARMISD::CMOV: {
// Bits are known zero/one if known on the LHS and RHS.
DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
RCPair
ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const {
+ MVT VT) const {
if (Constraint.size() == 1) {
// GCC ARM Constraint Letters
switch (Constraint[0]) {
case 'r':
return RCPair(0U, &ARM::GPRRegClass);
case 'w':
+ if (VT == MVT::Other)
+ break;
if (VT == MVT::f32)
return RCPair(0U, &ARM::SPRRegClass);
if (VT.getSizeInBits() == 64)
return RCPair(0U, &ARM::QPRRegClass);
break;
case 'x':
+ if (VT == MVT::Other)
+ break;
if (VT == MVT::f32)
return RCPair(0U, &ARM::SPR_8RegClass);
if (VT.getSizeInBits() == 64)
return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
+SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only");
+ unsigned Opcode = Op->getOpcode();
+ assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
+ "Invalid opcode for Div/Rem lowering");
+ bool isSigned = (Opcode == ISD::SDIVREM);
+ EVT VT = Op->getValueType(0);
+ Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+
+ RTLIB::Libcall LC;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ }
+
+ SDValue InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Op->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+
+ SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
+ getPointerTy());
+
+ Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL);
+
+ SDLoc dl(Op);
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true,
+ 0, getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
+
+ return CallInfo.first;
+}
+
bool
ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The ARM target isn't yet aware of offsets.
bool ARM::isBitFieldInvertedMask(unsigned v) {
if (v == 0xffffffff)
- return 0;
+ return false;
+
// there can be 1's on either or both "outsides", all the "inside"
// bits must be 0's
- unsigned int lsb = 0, msb = 31;
- while (v & (1 << msb)) --msb;
- while (v & (1 << lsb)) ++lsb;
- for (unsigned int i = lsb; i <= msb; ++i) {
- if (v & (1 << i))
- return 0;
- }
- return 1;
+ unsigned TO = CountTrailingOnes_32(v);
+ unsigned LO = CountLeadingOnes_32(v);
+ v = (v >> TO) << TO;
+ v = (v << LO) >> LO;
+ return v == 0;
}
/// isFPImmLegal - Returns true if the target can instruction select the
Info.writeMem = true;
return true;
}
+ case Intrinsic::arm_ldrex: {
+ PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+ Info.vol = true;
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::arm_strex: {
+ PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.ptrVal = I.getArgOperand(1);
+ Info.offset = 0;
+ Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+ Info.vol = true;
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
case Intrinsic::arm_strexd: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i64;