//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-isel"
#include "ARMISelLowering.h"
-#include "ARM.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
#include <utility>
using namespace llvm;
+#define DEBUG_TYPE "arm-isel"
+
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
-// This option should go away when tail calls fully work.
-static cl::opt<bool>
-EnableARMTailCalls("arm-tail-calls", cl::Hidden,
- cl::desc("Generate tail calls (TEMPORARY OPTION)."),
- cl::init(false));
-
cl::opt<bool>
EnableARMLongCalls("arm-long-calls", cl::Hidden,
cl::desc("Generate calls via indirect call instructions"),
}
// The APCS parameter registers.
-static const uint16_t GPRArgRegs[] = {
+static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
if (Subtarget->isTargetMachO()) {
// Uses VFP for Thumb libfuncs if available.
if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
- Subtarget->hasARMOps()) {
+ Subtarget->hasARMOps() && !TM.Options.UseSoftFloat) {
// Single-precision floating-point arithmetic.
setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
setLibcallName(RTLIB::SRL_I128, 0);
setLibcallName(RTLIB::SRA_I128, 0);
- if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO()) {
+ if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() &&
+ !Subtarget->isTargetWindows()) {
// Double-precision floating-point arithmetic helper functions
// RTABI chapter 4.1.2, Table 2
setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
- // ATOMIC_FENCE needs custom lowering; the other 32-bit ones are legal and
- // handled normally.
+ // ATOMIC_FENCE needs custom lowering; the others should have been expanded
+ // to ldrex/strex loops already.
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
- // Custom lowering for 64-bit ops
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
+
// On v8, we have particularly efficient implementations of atomic fences
// if they can be combined with nearby atomic loads and stores.
if (!Subtarget->hasV8Ops()) {
// Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
setInsertFencesForAtomic(true);
}
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
} else {
// If there's anything we can use as a barrier, go through custom lowering
// for ATOMIC_FENCE.
setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
}
}
-
+
// Combine sin / cos into one node or libcall if possible.
if (Subtarget->hasSinCos()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
-static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
- bool isThumb2, unsigned &LdrOpc,
- unsigned &StrOpc) {
- static const unsigned LoadBares[4][2] = {{ARM::LDREXB, ARM::t2LDREXB},
- {ARM::LDREXH, ARM::t2LDREXH},
- {ARM::LDREX, ARM::t2LDREX},
- {ARM::LDREXD, ARM::t2LDREXD}};
- static const unsigned LoadAcqs[4][2] = {{ARM::LDAEXB, ARM::t2LDAEXB},
- {ARM::LDAEXH, ARM::t2LDAEXH},
- {ARM::LDAEX, ARM::t2LDAEX},
- {ARM::LDAEXD, ARM::t2LDAEXD}};
- static const unsigned StoreBares[4][2] = {{ARM::STREXB, ARM::t2STREXB},
- {ARM::STREXH, ARM::t2STREXH},
- {ARM::STREX, ARM::t2STREX},
- {ARM::STREXD, ARM::t2STREXD}};
- static const unsigned StoreRels[4][2] = {{ARM::STLEXB, ARM::t2STLEXB},
- {ARM::STLEXH, ARM::t2STLEXH},
- {ARM::STLEX, ARM::t2STLEX},
- {ARM::STLEXD, ARM::t2STLEXD}};
-
- const unsigned (*LoadOps)[2], (*StoreOps)[2];
- if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
- LoadOps = LoadAcqs;
- else
- LoadOps = LoadBares;
-
- if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
- StoreOps = StoreRels;
- else
- StoreOps = StoreBares;
-
- assert(isPowerOf2_32(Size) && Size <= 8 &&
- "unsupported size for atomic binary op!");
-
- LdrOpc = LoadOps[Log2_32(Size)][isThumb2];
- StrOpc = StoreOps[Log2_32(Size)][isThumb2];
-}
-
// FIXME: It might make sense to define the representative register class as the
// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
bool isThisReturn = false;
bool isSibCall = false;
+
// Disable tail calls if they're not supported.
- if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
+ if (!Subtarget->supportsTailCall() || MF.getTarget().Options.DisableTailCalls)
isTailCall = false;
+
if (isTailCall) {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
Outs, OutVals, Ins, DAG);
+ if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
+ report_fatal_error("failed to perform tail call elimination on a call "
+ "site marked musttail");
// We don't support GuaranteedTailCallOpt for ARM, only automatically
// detected sibcalls.
if (isTailCall) {
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
- // For in-prologue parameters handling, we also introduce stack offset
- // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal.
- // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how
- // NSAA should be evaluted (NSAA means "next stacked argument address").
- // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs.
- // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs.
- unsigned NSAAOffset = State->getNextStackOffset();
- if (State->getCallOrPrologue() != Call) {
- for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) {
- unsigned RB, RE;
- State->getInRegsParamInfo(i, RB, RE);
- assert(NSAAOffset >= (RE-RB)*4 &&
- "Stack offset for byval regs doesn't introduced anymore?");
- NSAAOffset -= (RE-RB)*4;
- }
- }
if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
if (Subtarget->isAAPCS_ABI() && Align > 4) {
unsigned AlignInRegs = Align / 4;
// all remained GPR regs. In that case we can't split parameter, we must
// send it to stack. We also must set NCRN to R4, so waste all
// remained registers.
+ const unsigned NSAAOffset = State->getNextStackOffset();
if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
while (State->AllocateReg(GPRArgRegs, 4))
;
// allocate remained amount of registers we need.
for (unsigned i = reg+1; i != ByValRegEnd; ++i)
State->AllocateReg(GPRArgRegs, 4);
- // At a call site, a byval parameter that is split between
- // registers and memory needs its size truncated here. In a
- // function prologue, such byval parameters are reassembled in
- // memory, and are not truncated.
- if (State->getCallOrPrologue() == Call) {
- // Make remained size equal to 0 in case, when
- // the whole structure may be stored into registers.
- if (size < excess)
- size = 0;
- else
- size -= excess;
- }
+ // A byval parameter that is split between registers and memory needs its
+ // size truncated here.
+ // In the case where the entire structure fits in registers, we set the
+ // size in memory to zero.
+ if (size < excess)
+ size = 0;
+ else
+ size -= excess;
}
}
}
}
bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
- if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
+ if (!Subtarget->supportsTailCall())
return false;
- if (!CI->isTailCall())
+ if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
return false;
return !Subtarget->isThumb1Only();
ArgRegsSize = NumGPRs * 4;
// If parameter is split between stack and GPRs...
- if (NumGPRs && Align == 8 &&
+ if (NumGPRs && Align > 4 &&
(ArgRegsSize < ArgSize ||
InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) {
- // Add padding for part of param recovered from GPRs, so
- // its last byte must be at address K*8 - 1.
+ // Add padding for part of param recovered from GPRs. For example,
+ // if Align == 8, its last byte must be at address K*8 - 1.
// We need to do it, since remained (stack) part of parameter has
// stack alignment, and we need to "attach" "GPRs head" without gaps
// to it:
//
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned Padding =
- ((ArgRegsSize + AFI->getArgRegsSaveSize() + Align - 1) & ~(Align-1)) -
- (ArgRegsSize + AFI->getArgRegsSaveSize());
+ OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align);
ArgRegsSaveSize = ArgRegsSize + Padding;
} else
// We don't need to extend regs save size for byval parameters if they
unsigned OffsetFromOrigArg,
unsigned ArgOffset,
unsigned ArgSize,
- bool ForceMutable) const {
+ bool ForceMutable,
+ unsigned ByValStoreOffset,
+ unsigned TotalArgRegsSaveSize) const {
// Currently, two use-cases possible:
// Case #1. Non-var-args function, and we meet first byval parameter.
// Note: once stack area for byval/varargs registers
// was initialized, it can't be initialized again.
if (ArgRegsSaveSize) {
-
unsigned Padding = ArgRegsSaveSize - ArgRegsSize;
if (Padding) {
AFI->setStoredByValParamsPadding(Padding);
}
- int FrameIndex = MFI->CreateFixedObject(
- ArgRegsSaveSize,
- Padding + ArgOffset,
- false);
+ int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize,
+ Padding +
+ ByValStoreOffset -
+ (int64_t)TotalArgRegsSaveSize,
+ false);
SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
+ if (Padding) {
+ MFI->CreateFixedObject(Padding,
+ ArgOffset + ByValStoreOffset -
+ (int64_t)ArgRegsSaveSize,
+ false);
+ }
SmallVector<SDValue, 4> MemOps;
for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOps[0], MemOps.size());
return FrameIndex;
- } else
+ } else {
+ if (ArgSize == 0) {
+ // We cannot allocate a zero-byte object for the first variadic argument,
+ // so just make up a size.
+ ArgSize = 4;
+ }
// This will point to the next argument passed via stack.
return MFI->CreateFixedObject(
- 4, AFI->getStoredByValParamsPadding() + ArgOffset, !ForceMutable);
+ ArgSize, ArgOffset, !ForceMutable);
+ }
}
// Setup stack frame, the va_list pointer will start from.
ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
SDLoc dl, SDValue &Chain,
unsigned ArgOffset,
+ unsigned TotalArgRegsSaveSize,
bool ForceMutable) const {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
// argument passed via stack.
int FrameIndex =
StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(),
- 0, ArgOffset, 0, ForceMutable);
+ 0, ArgOffset, 0, ForceMutable, 0, TotalArgRegsSaveSize);
AFI->setVarArgsFrameIndex(FrameIndex);
}
// We also increase this value in case of varargs function.
AFI->setArgRegsSaveSize(0);
+ unsigned ByValStoreOffset = 0;
+ unsigned TotalArgRegsSaveSize = 0;
+ unsigned ArgRegsSaveSizeMaxAlign = 4;
+
+ // Calculate the amount of stack space that we need to allocate to store
+ // byval and variadic arguments that are passed in registers.
+ // We need to know this before we allocate the first byval or variadic
+ // argument, as they will be allocated a stack slot below the CFA (Canonical
+ // Frame Address, the stack pointer at entry to the function).
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isMemLoc()) {
+ int index = VA.getValNo();
+ if (index != lastInsIndex) {
+ ISD::ArgFlagsTy Flags = Ins[index].Flags;
+ if (Flags.isByVal()) {
+ unsigned ExtraArgRegsSize;
+ unsigned ExtraArgRegsSaveSize;
+ computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProceed(),
+ Flags.getByValSize(),
+ ExtraArgRegsSize, ExtraArgRegsSaveSize);
+
+ TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
+ if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign)
+ ArgRegsSaveSizeMaxAlign = Flags.getByValAlign();
+ CCInfo.nextInRegsParam();
+ }
+ lastInsIndex = index;
+ }
+ }
+ }
+ CCInfo.rewindByValRegsInfo();
+ lastInsIndex = -1;
+ if (isVarArg) {
+ unsigned ExtraArgRegsSize;
+ unsigned ExtraArgRegsSaveSize;
+ computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0,
+ ExtraArgRegsSize, ExtraArgRegsSaveSize);
+ TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
+ }
+ // If the arg regs save area contains N-byte aligned values, the
+ // bottom of it must be at least N-byte aligned.
+ TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign);
+ TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U);
+
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
// a tail call.
if (Flags.isByVal()) {
unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
+
+ ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
int FrameIndex = StoreByValRegs(
CCInfo, DAG, dl, Chain, CurOrigArg,
CurByValIndex,
Ins[VA.getValNo()].PartOffset,
VA.getLocMemOffset(),
Flags.getByValSize(),
- true /*force mutable frames*/);
+ true /*force mutable frames*/,
+ ByValStoreOffset,
+ TotalArgRegsSaveSize);
+ ByValStoreOffset += Flags.getByValSize();
+ ByValStoreOffset = std::min(ByValStoreOffset, 16U);
InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
CCInfo.nextInRegsParam();
} else {
- unsigned FIOffset = VA.getLocMemOffset() +
- AFI->getStoredByValParamsPadding();
+ unsigned FIOffset = VA.getLocMemOffset();
int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
FIOffset, true);
// varargs
if (isVarArg)
VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
- CCInfo.getNextStackOffset());
+ CCInfo.getNextStackOffset(),
+ TotalArgRegsSaveSize);
+
+ AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
return Chain;
}
// Value = 0x0000nnff: Op=x, Cmode=1100.
OpCmode = 0xc;
Imm = SplatBits >> 8;
- SplatBits |= 0xff;
break;
}
// Value = 0x00nnffff: Op=x, Cmode=1101.
OpCmode = 0xd;
Imm = SplatBits >> 16;
- SplatBits |= 0xffff;
break;
}
}
// Op=1, Cmode=1110.
OpCmode = 0x1e;
- SplatBits = Val;
VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
break;
}
return SDValue();
}
-static void
-ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
- SelectionDAG &DAG) {
- SDLoc dl(Node);
- assert (Node->getValueType(0) == MVT::i64 &&
- "Only know how to expand i64 atomics");
- AtomicSDNode *AN = cast<AtomicSDNode>(Node);
-
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(Node->getOperand(0)); // Chain
- Ops.push_back(Node->getOperand(1)); // Ptr
- for(unsigned i=2; i<Node->getNumOperands(); i++) {
- // Low part
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(i), DAG.getIntPtrConstant(0)));
- // High part
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(i), DAG.getIntPtrConstant(1)));
- }
- SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
- SDValue Result =
- DAG.getAtomic(Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(),
- cast<MemSDNode>(Node)->getMemOperand(), AN->getOrdering(),
- AN->getSynchScope());
- SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) };
- Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
- Results.push_back(Result.getValue(2));
-}
-
static void ReplaceREADCYCLECOUNTER(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG,
case ISD::READCYCLECOUNTER:
ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
return;
- case ISD::ATOMIC_STORE:
- case ISD::ATOMIC_LOAD:
- case ISD::ATOMIC_LOAD_ADD:
- case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_NAND:
- case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_SUB:
- case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_SWAP:
- case ISD::ATOMIC_CMP_SWAP:
- case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_MAX:
- case ISD::ATOMIC_LOAD_UMAX:
- ReplaceATOMIC_OP_64(N, Results, DAG);
- return;
}
if (Res.getNode())
Results.push_back(Res);
// ARM Scheduler Hooks
//===----------------------------------------------------------------------===//
-MachineBasicBlock *
-ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Size) const {
- unsigned dest = MI->getOperand(0).getReg();
- unsigned ptr = MI->getOperand(1).getReg();
- unsigned oldval = MI->getOperand(2).getReg();
- unsigned newval = MI->getOperand(3).getReg();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
- DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
-
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- unsigned scratch = MRI.createVirtualRegister(isThumb2 ?
- (const TargetRegisterClass*)&ARM::rGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass);
-
- if (isThumb2) {
- MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
- MRI.constrainRegClass(oldval, &ARM::rGPRRegClass);
- MRI.constrainRegClass(newval, &ARM::rGPRRegClass);
- }
-
- unsigned ldrOpc, strOpc;
- getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
-
- MachineFunction *MF = BB->getParent();
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It; // insert the new blocks after the current block
-
- MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MF->insert(It, loop1MBB);
- MF->insert(It, loop2MBB);
- MF->insert(It, exitMBB);
-
- // Transfer the remainder of BB and its successor edges to exitMBB.
- exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // thisMBB:
- // ...
- // fallthrough --> loop1MBB
- BB->addSuccessor(loop1MBB);
-
- // loop1MBB:
- // ldrex dest, [ptr]
- // cmp dest, oldval
- // bne exitMBB
- BB = loop1MBB;
- MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
- if (ldrOpc == ARM::t2LDREX)
- MIB.addImm(0);
- AddDefaultPred(MIB);
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
- .addReg(dest).addReg(oldval));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
- BB->addSuccessor(loop2MBB);
- BB->addSuccessor(exitMBB);
-
- // loop2MBB:
- // strex scratch, newval, [ptr]
- // cmp scratch, #0
- // bne loop1MBB
- BB = loop2MBB;
- MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr);
- if (strOpc == ARM::t2STREX)
- MIB.addImm(0);
- AddDefaultPred(MIB);
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
- .addReg(scratch).addImm(0));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
- BB->addSuccessor(loop1MBB);
- BB->addSuccessor(exitMBB);
-
- // exitMBB:
- // ...
- BB = exitMBB;
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
-}
-
-MachineBasicBlock *
-ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
- unsigned Size, unsigned BinOpcode) const {
- // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction *MF = BB->getParent();
- MachineFunction::iterator It = BB;
- ++It;
-
- unsigned dest = MI->getOperand(0).getReg();
- unsigned ptr = MI->getOperand(1).getReg();
- unsigned incr = MI->getOperand(2).getReg();
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
- DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
-
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- if (isThumb2) {
- MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
- MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
- MRI.constrainRegClass(incr, &ARM::rGPRRegClass);
- }
-
- unsigned ldrOpc, strOpc;
- getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
-
- MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MF->insert(It, loopMBB);
- MF->insert(It, exitMBB);
-
- // Transfer the remainder of BB and its successor edges to exitMBB.
- exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- const TargetRegisterClass *TRC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::rGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
- unsigned scratch = MRI.createVirtualRegister(TRC);
- unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
-
- // thisMBB:
- // ...
- // fallthrough --> loopMBB
- BB->addSuccessor(loopMBB);
-
- // loopMBB:
- // ldrex dest, ptr
- // <binop> scratch2, dest, incr
- // strex scratch, scratch2, ptr
- // cmp scratch, #0
- // bne- loopMBB
- // fallthrough --> exitMBB
- BB = loopMBB;
- MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
- if (ldrOpc == ARM::t2LDREX)
- MIB.addImm(0);
- AddDefaultPred(MIB);
- if (BinOpcode) {
- // operand order needs to go the other way for NAND
- if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
- AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
- addReg(incr).addReg(dest)).addReg(0);
- else
- AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
- addReg(dest).addReg(incr)).addReg(0);
- }
-
- MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
- if (strOpc == ARM::t2STREX)
- MIB.addImm(0);
- AddDefaultPred(MIB);
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
- .addReg(scratch).addImm(0));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
-
- BB->addSuccessor(loopMBB);
- BB->addSuccessor(exitMBB);
-
- // exitMBB:
- // ...
- BB = exitMBB;
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
-}
-
-MachineBasicBlock *
-ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Size,
- bool signExtend,
- ARMCC::CondCodes Cond) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction *MF = BB->getParent();
- MachineFunction::iterator It = BB;
- ++It;
-
- unsigned dest = MI->getOperand(0).getReg();
- unsigned ptr = MI->getOperand(1).getReg();
- unsigned incr = MI->getOperand(2).getReg();
- unsigned oldval = dest;
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
- DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
-
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- if (isThumb2) {
- MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
- MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
- MRI.constrainRegClass(incr, &ARM::rGPRRegClass);
- }
-
- unsigned ldrOpc, strOpc, extendOpc;
- getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
- switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicBinaryMinMax!");
- case 1:
- extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
- break;
- case 2:
- extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
- break;
- case 4:
- extendOpc = 0;
- break;
- }
-
- MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MF->insert(It, loopMBB);
- MF->insert(It, exitMBB);
-
- // Transfer the remainder of BB and its successor edges to exitMBB.
- exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- const TargetRegisterClass *TRC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::rGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
- unsigned scratch = MRI.createVirtualRegister(TRC);
- unsigned scratch2 = MRI.createVirtualRegister(TRC);
-
- // thisMBB:
- // ...
- // fallthrough --> loopMBB
- BB->addSuccessor(loopMBB);
-
- // loopMBB:
- // ldrex dest, ptr
- // (sign extend dest, if required)
- // cmp dest, incr
- // cmov.cond scratch2, incr, dest
- // strex scratch, scratch2, ptr
- // cmp scratch, #0
- // bne- loopMBB
- // fallthrough --> exitMBB
- BB = loopMBB;
- MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
- if (ldrOpc == ARM::t2LDREX)
- MIB.addImm(0);
- AddDefaultPred(MIB);
-
- // Sign extend the value, if necessary.
- if (signExtend && extendOpc) {
- oldval = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass
- : &ARM::GPRnopcRegClass);
- if (!isThumb2)
- MRI.constrainRegClass(dest, &ARM::GPRnopcRegClass);
- AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval)
- .addReg(dest)
- .addImm(0));
- }
-
- // Build compare and cmov instructions.
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
- .addReg(oldval).addReg(incr));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
- .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR);
-
- MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
- if (strOpc == ARM::t2STREX)
- MIB.addImm(0);
- AddDefaultPred(MIB);
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
- .addReg(scratch).addImm(0));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
-
- BB->addSuccessor(loopMBB);
- BB->addSuccessor(exitMBB);
-
- // exitMBB:
- // ...
- BB = exitMBB;
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
-}
-
-MachineBasicBlock *
-ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
- unsigned Op1, unsigned Op2,
- bool NeedsCarry, bool IsCmpxchg,
- bool IsMinMax, ARMCC::CondCodes CC) const {
- // This also handles ATOMIC_SWAP and ATOMIC_STORE, indicated by Op1==0.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction *MF = BB->getParent();
- MachineFunction::iterator It = BB;
- ++It;
-
- bool isStore = (MI->getOpcode() == ARM::ATOMIC_STORE_I64);
- unsigned offset = (isStore ? -2 : 0);
- unsigned destlo = MI->getOperand(0).getReg();
- unsigned desthi = MI->getOperand(1).getReg();
- unsigned ptr = MI->getOperand(offset+2).getReg();
- unsigned vallo = MI->getOperand(offset+3).getReg();
- unsigned valhi = MI->getOperand(offset+4).getReg();
- unsigned OrdIdx = offset + (IsCmpxchg ? 7 : 5);
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(OrdIdx).getImm());
- DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
-
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- if (isThumb2) {
- MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
- MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
- MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
- MRI.constrainRegClass(vallo, &ARM::rGPRRegClass);
- MRI.constrainRegClass(valhi, &ARM::rGPRRegClass);
- }
-
- unsigned ldrOpc, strOpc;
- getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc);
-
- MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *contBB = 0, *cont2BB = 0;
- if (IsCmpxchg || IsMinMax)
- contBB = MF->CreateMachineBasicBlock(LLVM_BB);
- if (IsCmpxchg)
- cont2BB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
-
- MF->insert(It, loopMBB);
- if (IsCmpxchg || IsMinMax) MF->insert(It, contBB);
- if (IsCmpxchg) MF->insert(It, cont2BB);
- MF->insert(It, exitMBB);
-
- // Transfer the remainder of BB and its successor edges to exitMBB.
- exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- const TargetRegisterClass *TRC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
- unsigned storesuccess = MRI.createVirtualRegister(TRC);
-
- // thisMBB:
- // ...
- // fallthrough --> loopMBB
- BB->addSuccessor(loopMBB);
-
- // loopMBB:
- // ldrexd r2, r3, ptr
- // <binopa> r0, r2, incr
- // <binopb> r1, r3, incr
- // strexd storesuccess, r0, r1, ptr
- // cmp storesuccess, #0
- // bne- loopMBB
- // fallthrough --> exitMBB
- BB = loopMBB;
-
- if (!isStore) {
- // Load
- if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
- .addReg(destlo, RegState::Define)
- .addReg(desthi, RegState::Define)
- .addReg(ptr));
- } else {
- unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
- .addReg(GPRPair0, RegState::Define).addReg(ptr));
- // Copy r2/r3 into dest. (This copy will normally be coalesced.)
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
- .addReg(GPRPair0, 0, ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
- .addReg(GPRPair0, 0, ARM::gsub_1);
- }
- }
-
- unsigned StoreLo, StoreHi;
- if (IsCmpxchg) {
- // Add early exit
- for (unsigned i = 0; i < 2; i++) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr :
- ARM::CMPrr))
- .addReg(i == 0 ? destlo : desthi)
- .addReg(i == 0 ? vallo : valhi));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
- BB->addSuccessor(exitMBB);
- BB->addSuccessor(i == 0 ? contBB : cont2BB);
- BB = (i == 0 ? contBB : cont2BB);
- }
-
- // Copy to physregs for strexd
- StoreLo = MI->getOperand(5).getReg();
- StoreHi = MI->getOperand(6).getReg();
- } else if (Op1) {
- // Perform binary operation
- unsigned tmpRegLo = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), tmpRegLo)
- .addReg(destlo).addReg(vallo))
- .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry));
- unsigned tmpRegHi = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi)
- .addReg(desthi).addReg(valhi))
- .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax));
-
- StoreLo = tmpRegLo;
- StoreHi = tmpRegHi;
- } else {
- // Copy to physregs for strexd
- StoreLo = vallo;
- StoreHi = valhi;
- }
- if (IsMinMax) {
- // Compare and branch to exit block.
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(exitMBB).addImm(CC).addReg(ARM::CPSR);
- BB->addSuccessor(exitMBB);
- BB->addSuccessor(contBB);
- BB = contBB;
- StoreLo = vallo;
- StoreHi = valhi;
- }
-
- // Store
- if (isThumb2) {
- MRI.constrainRegClass(StoreLo, &ARM::rGPRRegClass);
- MRI.constrainRegClass(StoreHi, &ARM::rGPRRegClass);
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
- .addReg(StoreLo).addReg(StoreHi).addReg(ptr));
- } else {
- // Marshal a pair...
- unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
- .addReg(UndefPair)
- .addReg(StoreLo)
- .addImm(ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair)
- .addReg(r1)
- .addReg(StoreHi)
- .addImm(ARM::gsub_1);
-
- // ...and store it
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
- .addReg(StorePair).addReg(ptr));
- }
- // Cmp+jump
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
- .addReg(storesuccess).addImm(0));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
-
- BB->addSuccessor(loopMBB);
- BB->addSuccessor(exitMBB);
-
- // exitMBB:
- // ...
- BB = exitMBB;
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
-}
-
-MachineBasicBlock *
-ARMTargetLowering::EmitAtomicLoad64(MachineInstr *MI, MachineBasicBlock *BB) const {
-
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
- unsigned destlo = MI->getOperand(0).getReg();
- unsigned desthi = MI->getOperand(1).getReg();
- unsigned ptr = MI->getOperand(2).getReg();
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
- DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
-
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- if (isThumb2) {
- MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
- MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
- MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
- }
- unsigned ldrOpc, strOpc;
- getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc);
-
- MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(ldrOpc));
-
- if (isThumb2) {
- MIB.addReg(destlo, RegState::Define)
- .addReg(desthi, RegState::Define)
- .addReg(ptr);
-
- } else {
- unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- MIB.addReg(GPRPair0, RegState::Define).addReg(ptr);
-
- // Copy GPRPair0 into dest. (This copy will normally be coalesced.)
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), destlo)
- .addReg(GPRPair0, 0, ARM::gsub_0);
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), desthi)
- .addReg(GPRPair0, 0, ARM::gsub_1);
- }
- AddDefaultPred(MIB);
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
-}
-
/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
/// registers the function context.
void ARMTargetLowering::
}
// N.B. the order the invoke BBs are processed in doesn't matter here.
- const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF);
+ const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
SmallVector<MachineBasicBlock*, 64> MBBLPads;
for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) {
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// Load an immediate to varEnd.
MI->eraseFromParent();
return BB;
}
- case ARM::ATOMIC_LOAD_ADD_I8:
- return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
- case ARM::ATOMIC_LOAD_ADD_I16:
- return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
- case ARM::ATOMIC_LOAD_ADD_I32:
- return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
-
- case ARM::ATOMIC_LOAD_AND_I8:
- return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
- case ARM::ATOMIC_LOAD_AND_I16:
- return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
- case ARM::ATOMIC_LOAD_AND_I32:
- return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
-
- case ARM::ATOMIC_LOAD_OR_I8:
- return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
- case ARM::ATOMIC_LOAD_OR_I16:
- return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
- case ARM::ATOMIC_LOAD_OR_I32:
- return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
-
- case ARM::ATOMIC_LOAD_XOR_I8:
- return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
- case ARM::ATOMIC_LOAD_XOR_I16:
- return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
- case ARM::ATOMIC_LOAD_XOR_I32:
- return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
-
- case ARM::ATOMIC_LOAD_NAND_I8:
- return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
- case ARM::ATOMIC_LOAD_NAND_I16:
- return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
- case ARM::ATOMIC_LOAD_NAND_I32:
- return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
-
- case ARM::ATOMIC_LOAD_SUB_I8:
- return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
- case ARM::ATOMIC_LOAD_SUB_I16:
- return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
- case ARM::ATOMIC_LOAD_SUB_I32:
- return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
-
- case ARM::ATOMIC_LOAD_MIN_I8:
- return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT);
- case ARM::ATOMIC_LOAD_MIN_I16:
- return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT);
- case ARM::ATOMIC_LOAD_MIN_I32:
- return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT);
-
- case ARM::ATOMIC_LOAD_MAX_I8:
- return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT);
- case ARM::ATOMIC_LOAD_MAX_I16:
- return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT);
- case ARM::ATOMIC_LOAD_MAX_I32:
- return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT);
-
- case ARM::ATOMIC_LOAD_UMIN_I8:
- return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO);
- case ARM::ATOMIC_LOAD_UMIN_I16:
- return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO);
- case ARM::ATOMIC_LOAD_UMIN_I32:
- return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO);
-
- case ARM::ATOMIC_LOAD_UMAX_I8:
- return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI);
- case ARM::ATOMIC_LOAD_UMAX_I16:
- return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI);
- case ARM::ATOMIC_LOAD_UMAX_I32:
- return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI);
-
- case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0);
- case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
- case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
-
- case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1);
- case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
- case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
-
- case ARM::ATOMIC_LOAD_I64:
- return EmitAtomicLoad64(MI, BB);
-
- case ARM::ATOMIC_LOAD_ADD_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr,
- isThumb2 ? ARM::t2ADCrr : ARM::ADCrr,
- /*NeedsCarry*/ true);
- case ARM::ATOMIC_LOAD_SUB_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
- isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
- /*NeedsCarry*/ true);
- case ARM::ATOMIC_LOAD_OR_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr,
- isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
- case ARM::ATOMIC_LOAD_XOR_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr,
- isThumb2 ? ARM::t2EORrr : ARM::EORrr);
- case ARM::ATOMIC_LOAD_AND_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr,
- isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
- case ARM::ATOMIC_STORE_I64:
- case ARM::ATOMIC_SWAP_I64:
- return EmitAtomicBinary64(MI, BB, 0, 0, false);
- case ARM::ATOMIC_CMP_SWAP_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
- isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
- /*NeedsCarry*/ false, /*IsCmpxchg*/true);
- case ARM::ATOMIC_LOAD_MIN_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
- isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
- /*NeedsCarry*/ true, /*IsCmpxchg*/false,
- /*IsMinMax*/ true, ARMCC::LT);
- case ARM::ATOMIC_LOAD_MAX_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
- isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
- /*NeedsCarry*/ true, /*IsCmpxchg*/false,
- /*IsMinMax*/ true, ARMCC::GE);
- case ARM::ATOMIC_LOAD_UMIN_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
- isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
- /*NeedsCarry*/ true, /*IsCmpxchg*/false,
- /*IsMinMax*/ true, ARMCC::LO);
- case ARM::ATOMIC_LOAD_UMAX_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
- isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
- /*NeedsCarry*/ true, /*IsCmpxchg*/false,
- /*IsMinMax*/ true, ARMCC::HS);
case ARM::tMOVCCr_pseudo: {
// To "insert" a SELECT_CC instruction, we actually have to insert the
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
BB->addSuccessor(copy0MBB);
case ARM::BCCi64:
case ARM::BCCZi64: {
// If there is an unconditional branch to the other successor, remove it.
- BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
+ BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
// Compare both parts that make up the double comparison separately for
// equality.
// Transfer the remainder of BB and its successor edges to sinkMBB.
SinkBB->splice(SinkBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
SinkBB->transferSuccessorsAndUpdatePHIs(BB);
BB->addSuccessor(RSBBB);
// Get widened type and narrowed type.
MVT widenType;
unsigned numElem = VT.getVectorNumElements();
- switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
+
+ EVT inputLaneType = Vec.getValueType().getVectorElementType();
+ switch (inputLaneType.getSimpleVT().SimpleTy) {
case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
widenType, &Ops[0], Ops.size());
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, tmp);
+ unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
+ return DAG.getNode(ExtOp, SDLoc(N), VT, tmp);
}
static SDValue findMUL_LOHI(SDValue V) {
// Fold obvious case.
V = V.getOperand(0);
else {
- V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
+ V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
// Make the DAGCombiner fold the bitcasts.
DCI.AddToWorklist(V.getNode());
}
Tys[n] = VecTy;
Tys[n++] = MVT::i32;
Tys[n] = MVT::Other;
- SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2);
+ SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumResultVecs+2));
SmallVector<SDValue, 8> Ops;
Ops.push_back(N->getOperand(0)); // incoming chain
Ops.push_back(N->getOperand(AddrOpIdx));
for (n = 0; n < NumVecs; ++n)
Tys[n] = VT;
Tys[n] = MVT::Other;
- SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1);
+ SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumVecs+1));
SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
KnownOne &= KnownOneRHS;
return;
}
+ case ISD::INTRINSIC_W_CHAIN: {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
+ Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
+ switch (IntID) {
+ default: return;
+ case Intrinsic::arm_ldaex:
+ case Intrinsic::arm_ldrex: {
+ EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
+ unsigned MemBits = VT.getScalarType().getSizeInBits();
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
+ return;
+ }
+ }
+ }
}
}
Info.writeMem = true;
return true;
}
+ case Intrinsic::arm_ldaex:
case Intrinsic::arm_ldrex: {
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.writeMem = false;
return true;
}
+ case Intrinsic::arm_stlex:
case Intrinsic::arm_strex: {
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.writeMem = true;
return true;
}
+ case Intrinsic::arm_stlexd:
case Intrinsic::arm_strexd: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i64;
Info.writeMem = true;
return true;
}
+ case Intrinsic::arm_ldaexd:
case Intrinsic::arm_ldrexd: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i64;
return false;
return true;
}
+
+bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
+ // Loads and stores less than 64-bits are already atomic; ones above that
+ // are doomed anyway, so defer to the default libcall and blame the OS when
+ // things go wrong:
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 64;
+ else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ return LI->getType()->getPrimitiveSizeInBits() == 64;
+
+ // For the real atomic operations, we have ldrex/strex up to 64 bits.
+ return Inst->getType()->getPrimitiveSizeInBits() <= 64;
+}
+
+Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+ AtomicOrdering Ord) const {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
+ bool IsAcquire =
+ Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+
+ // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
+ // intrinsic must return {i32, i32} and we have to recombine them into a
+ // single i64 here.
+ if (ValTy->getPrimitiveSizeInBits() == 64) {
+ Intrinsic::ID Int =
+ IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
+ Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int);
+
+ Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
+ Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
+
+ Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
+ Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
+ Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
+ Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
+ return Builder.CreateOr(
+ Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
+ }
+
+ Type *Tys[] = { Addr->getType() };
+ Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
+ Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys);
+
+ return Builder.CreateTruncOrBitCast(
+ Builder.CreateCall(Ldrex, Addr),
+ cast<PointerType>(Addr->getType())->getElementType());
+}
+
+Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
+ Value *Addr,
+ AtomicOrdering Ord) const {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ bool IsRelease =
+ Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+
+ // Since the intrinsics must have legal type, the i64 intrinsics take two
+ // parameters: "i32, i32". We must marshal Val into the appropriate form
+ // before the call.
+ if (Val->getType()->getPrimitiveSizeInBits() == 64) {
+ Intrinsic::ID Int =
+ IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
+ Function *Strex = Intrinsic::getDeclaration(M, Int);
+ Type *Int32Ty = Type::getInt32Ty(M->getContext());
+
+ Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
+ Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
+ Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
+ return Builder.CreateCall3(Strex, Lo, Hi, Addr);
+ }
+
+ Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
+ Type *Tys[] = { Addr->getType() };
+ Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
+
+ return Builder.CreateCall2(
+ Strex, Builder.CreateZExtOrBitCast(
+ Val, Strex->getFunctionType()->getParamType(0)),
+ Addr);
+}