namespace llvm {
-class TargetLowering;
class GlobalVariable;
+class TargetLowering;
+class SDNode;
+class SelectionDAG;
/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
/// of insertvalue or extractvalue indices that identify a member, return
bool isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
const TargetLowering &TLI);
+bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+ const TargetLowering &TLI);
+
} // End llvm namespace
#endif
return SDValue(); // this is here to silence compiler errors
}
+ /// isUsedByReturnOnly - Return true if result of the specified node is used
+ /// by a return node only. This is used to determine whether it is possible
+ /// to codegen a libcall as tail call at legalization time.
+ virtual bool isUsedByReturnOnly(SDNode *N) const {
+ return false;
+ }
+
/// LowerOperationWrapper - This callback is invoked by the type legalizer
/// to legalize nodes with an illegal operand type but legal result types.
/// It replaces the LowerOperation callback in the type Legalizer.
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
return true;
}
+bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+ const TargetLowering &TLI) {
+ const Function *F = DAG.getMachineFunction().getFunction();
+
+ // Conservatively require the attributes of the call to match those of
+ // the return. Ignore noalias because it doesn't affect the call sequence.
+ unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+ if (CallerRetAttr & ~Attribute::NoAlias)
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+ return false;
+
+ // Check if the only use is a function return node.
+ return TLI.isUsedByReturnOnly(Node);
+}
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetData.h"
// Splice the libcall in wherever FindInputOutputChains tells us to.
const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+
+ // isTailCall may be true since the callee does not reference caller stack
+ // frame. Check if it's in the right position.
+ bool isTailCall = isInTailCallPosition(DAG, Node, TLI);
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
- 0, TLI.getLibcallCallingConv(LC), false,
+ 0, TLI.getLibcallCallingConv(LC), isTailCall,
/*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
+ if (!CallInfo.second.getNode())
+ // It's a tailcall, return the chain (which is the DAG root).
+ return DAG.getRoot();
+
// Legalize the call sequence, starting with the chain. This will advance
// the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
// was added by LowerCallTo (guaranteeing proper serialization of calls).
const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
- 0, TLI.getLibcallCallingConv(LC), false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
/*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
TargetLowering::ArgListTy Args;
std::pair<SDValue, SDValue> CallResult =
TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
- false, false, false, false, 0, CallingConv::C, false,
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__sync_synchronize",
TLI.getPointerTy()),
TargetLowering::ArgListTy Args;
std::pair<SDValue, SDValue> CallResult =
TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
- false, false, false, false, 0, CallingConv::C, false,
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("abort", TLI.getPointerTy()),
Args, DAG, dl);
MachineOperand &JumpTarget = MBBI->getOperand(0);
// Jump to label or value in register.
- if (RetOpcode == ARM::TCRETURNdi) {
- BuildMI(MBB, MBBI, dl,
- TII.get(STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
- JumpTarget.getTargetFlags());
- } else if (RetOpcode == ARM::TCRETURNdiND) {
- BuildMI(MBB, MBBI, dl,
- TII.get(STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
- JumpTarget.getTargetFlags());
+ if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) {
+ unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi)
+ ? (STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)
+ : (STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
} else if (RetOpcode == ARM::TCRETURNri) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)).
addReg(JumpTarget.getReg(), RegState::Kill);
// whether LR is going to be used. Probably the right approach is to
// generate the tail call here and turn it back into CALL/RET in
// emitEpilogue if LR is used.
- if (Subtarget->isThumb1Only())
- return false;
-
- // For the moment, we can only do this to functions defined in this
- // compilation, or to indirect calls. A Thumb B to an ARM function,
- // or vice versa, is not easily fixed up in the linker unlike BL.
- // (We could do this by loading the address of the callee into a register;
- // that is an extra instruction over the direct call and burns a register
- // as well, so is not likely to be a win.)
-
- // It might be safe to remove this restriction on non-Darwin.
// Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
// but we need to make sure there are enough registers; the only valid
// registers are the 4 used for parameters. We don't currently do this
// case.
- if (isa<ExternalSymbolSDNode>(Callee))
- return false;
-
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- const GlobalValue *GV = G->getGlobal();
- if (GV->isDeclaration() || GV->isWeakForLinker())
- return false;
- }
+ if (Subtarget->isThumb1Only())
+ return false;
// If the calling conventions do not match, then we'd better make sure the
// results are returned in the same way as what the caller expects.
return result;
}
+bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const {
+ if (N->getNumValues() != 1)
+ return false;
+ if (!N->hasNUsesOfValue(1, 0))
+ return false;
+
+ unsigned NumCopies = 0;
+ SDNode* Copies[2];
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() == ISD::CopyToReg) {
+ Copies[NumCopies++] = Use;
+ } else if (Use->getOpcode() == ARMISD::VMOVRRD) {
+ // f64 returned in a pair of GPRs.
+ for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() != ISD::CopyToReg)
+ return false;
+ Copies[UI.getUse().getResNo()] = *UI;
+ ++NumCopies;
+ }
+ } else if (Use->getOpcode() == ISD::BITCAST) {
+ // f32 returned in a single GPR.
+ if (!Use->hasNUsesOfValue(1, 0))
+ return false;
+ Use = *Use->use_begin();
+ if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0))
+ return false;
+ Copies[NumCopies++] = Use;
+ } else {
+ return false;
+ }
+
+ if (NumCopies != 1 && NumCopies != 2)
+ return false;
+ for (unsigned i = 0; i < NumCopies; ++i) {
+ SDNode *Copy = Copies[i];
+ for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::CopyToReg) {
+ SDNode *Use = *UI;
+ if (Use == Copies[0] || Use == Copies[1])
+ continue;
+ return false;
+ }
+ if (UI->getOpcode() != ARMISD::RET_FLAG)
+ return false;
+ }
+ }
+
+ return true;
+}
+
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
+ virtual bool isUsedByReturnOnly(SDNode *N) const;
+
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
SDValue getVFPCmp(SDValue LHS, SDValue RHS,
//===---------------------------------------------------------------------===//
-Linux is missing some basic tail call support:
-
-#include <math.h>
-double foo(double a) { return sin(a); }
-
-This compiles into this on x86-64 Linux (but not darwin):
-foo:
- subq $8, %rsp
- call sin
- addq $8, %rsp
- ret
-vs:
-
-foo:
- jmp sin
-
-//===---------------------------------------------------------------------===//
-
Tail call optimization improvements: Tail call optimization currently
pushes all arguments on the top of the stack (their normal place for
non-tail call optimized calls) that source from the callers arguments
// Jump to label or value in register.
if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
- BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
- ? X86::TAILJMPd : X86::TAILJMPd64)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
- JumpTarget.getTargetFlags());
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
+ ? X86::TAILJMPd : X86::TAILJMPd64));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
} else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
MVT::Other, &RetOps[0], RetOps.size());
}
+bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
+ if (N->getNumValues() != 1)
+ return false;
+ if (!N->hasNUsesOfValue(1, 0))
+ return false;
+
+ SDNode *Copy = *N->use_begin();
+ if (Copy->getOpcode() != ISD::CopyToReg)
+ return false;
+ for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+ UI != UE; ++UI)
+ if (UI->getOpcode() != X86ISD::RET_FLAG)
+ return false;
+
+ return true;
+}
+
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
///
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
unsigned char OpFlags = 0;
- // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
- // symbols should go through the PLT.
- if (Subtarget->isTargetELF() &&
- getTargetMachine().getRelocationModel() == Reloc::PIC_) {
- OpFlags = X86II::MO_PLT;
- } else if (Subtarget->isPICStyleStubAny() &&
- Subtarget->getDarwinVers() < 9) {
- // PC-relative references to external symbols should go through $stub,
- // unless we're building with the leopard linker or later, which
- // automatically synthesizes these stubs.
- OpFlags = X86II::MO_DARWIN_STUB;
+ if (!isTailCall) {
+ // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
+ // external symbols should go through the PLT.
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ OpFlags = X86II::MO_PLT;
+ } else if (Subtarget->isPICStyleStubAny() &&
+ Subtarget->getDarwinVers() < 9) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = X86II::MO_DARWIN_STUB;
+ }
}
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
+ virtual bool isUsedByReturnOnly(SDNode *N) const;
+
virtual bool
CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
-; RUN: llc < %s -mtriple=arm-apple-darwin -march=arm | FileCheck %s -check-prefix=CHECKV4
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5
-; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\
-; RUN: -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF
-; XFAIL: *
+; RUN: llc < %s -mtriple=armv6-apple-darwin -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
@t = weak global i32 ()* null ; <i32 ()**> [#uses=1]
define void @t1() {
; CHECKELF: t1:
-; CHECKELF: PLT
+; CHECKELF: bl g(PLT)
call void @g( i32 1, i32 2, i32 3, i32 4 )
ret void
}
define void @t2() {
-; CHECKV4: t2:
-; CHECKV4: bx r0 @ TAILCALL
-; CHECKV5: t2:
-; CHECKV5: bx r0 @ TAILCALL
+; CHECKV6: t2:
+; CHECKV6: bx r0 @ TAILCALL
%tmp = load i32 ()** @t ; <i32 ()*> [#uses=1]
%tmp.upgrd.2 = tail call i32 %tmp( ) ; <i32> [#uses=0]
ret void
}
-define i32* @t3(i32, i32, i32*, i32*, i32*) nounwind {
-; CHECKV4: t3:
-; CHECKV4: bx r{{.*}}
-BB0:
- %5 = inttoptr i32 %0 to i32* ; <i32*> [#uses=1]
- %t35 = volatile load i32* %5 ; <i32> [#uses=1]
- %6 = inttoptr i32 %t35 to i32** ; <i32**> [#uses=1]
- %7 = getelementptr i32** %6, i32 86 ; <i32**> [#uses=1]
- %8 = load i32** %7 ; <i32*> [#uses=1]
- %9 = bitcast i32* %8 to i32* (i32, i32*, i32, i32*, i32*, i32*)* ; <i32* (i32, i32*, i32, i32*, i32*, i32*)*> [#uses=1]
- %10 = call i32* %9(i32 %0, i32* null, i32 %1, i32* %2, i32* %3, i32* %4) ; <i32*> [#uses=1]
- ret i32* %10
-}
-
-define void @t4() {
-; CHECKV4: t4:
-; CHECKV4: b _t2 @ TAILCALL
-; CHECKV5: t4:
-; CHECKV5: b _t2 @ TAILCALL
+define void @t3() {
+; CHECKV6: t3:
+; CHECKV6: b _t2 @ TAILCALL
+; CHECKELF: t3:
+; CHECKELF: b t2(PLT) @ TAILCALL
tail call void @t2( ) ; <i32> [#uses=0]
ret void
}
+
+; Sibcall optimization of expanded libcalls. rdar://8707777
+define double @t4(double %a) nounwind readonly ssp {
+entry:
+; CHECKV6: t4:
+; CHECKV6: b _sin @ TAILCALL
+; CHECKELF: t4:
+; CHECKELF: b sin(PLT) @ TAILCALL
+ %0 = tail call double @sin(double %a) nounwind readonly ; <double> [#uses=1]
+ ret double %0
+}
+
+define float @t5(float %a) nounwind readonly ssp {
+entry:
+; CHECKV6: t5:
+; CHECKV6: b _sinf @ TAILCALL
+; CHECKELF: t5:
+; CHECKELF: b sinf(PLT) @ TAILCALL
+ %0 = tail call float @sinf(float %a) nounwind readonly ; <float> [#uses=1]
+ ret float %0
+}
+
+declare float @sinf(float) nounwind readonly
+
+declare double @sin(double) nounwind readonly
+
+define i32 @t6(i32 %a, i32 %b) nounwind readnone {
+entry:
+; CHECKV6: t6:
+; CHECKV6: b ___divsi3 @ TAILCALL
+; CHECKELF: t6:
+; CHECKELF: b __aeabi_idiv(PLT) @ TAILCALL
+ %0 = sdiv i32 %a, %b
+ ret i32 %0
+}
--- /dev/null
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; Sibcall optimization of expanded libcalls.
+; rdar://8707777
+
+define double @foo(double %a) nounwind readonly ssp {
+entry:
+; CHECK: foo:
+; CHECK: jmp {{_?}}sin
+ %0 = tail call double @sin(double %a) nounwind readonly
+ ret double %0
+}
+
+define float @bar(float %a) nounwind readonly ssp {
+; CHECK: bar:
+; CHECK: jmp {{_?}}sinf
+entry:
+ %0 = tail call float @sinf(float %a) nounwind readonly
+ ret float %0
+}
+
+declare float @sinf(float) nounwind readonly
+
+declare double @sin(double) nounwind readonly