Enable sibling call optimization of libcalls which are expanded during

author Evan Cheng <evan.cheng@apple.com>

Tue, 30 Nov 2010 23:55:39 +0000 (23:55 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Tue, 30 Nov 2010 23:55:39 +0000 (23:55 +0000)
author Evan Cheng <evan.cheng@apple.com>
Tue, 30 Nov 2010 23:55:39 +0000 (23:55 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Tue, 30 Nov 2010 23:55:39 +0000 (23:55 +0000)
diff --git a/include/llvm/CodeGen/Analysis.h b/include/llvm/CodeGen/Analysis.h

index a8292ea649e1733a960d6bdeb9696b33540f1a47..78bf9fc11aa86d2ca589b0edacad11ade9e53ded 100644 (file)
--- a/include/llvm/CodeGen/Analysis.h
+++ b/include/llvm/CodeGen/Analysis.h
@@ -23,8 +23,10 @@
  
  namespace llvm {
  
-class TargetLowering;
  class GlobalVariable;
+class TargetLowering;
+class SDNode;
+class SelectionDAG;
  
  /// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
  /// of insertvalue or extractvalue indices that identify a member, return
@@ -75,6 +77,9 @@ ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred);
  bool isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
                            const TargetLowering &TLI);
  
+bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+                          const TargetLowering &TLI);
+
  } // End llvm namespace
  
  #endif
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h

index b450f42752a7db1cce7f3105da325b9fc2e4c8b3..51925bde82f8f68e27465622b07725e03f8547a2 100644 (file)
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -1258,6 +1258,13 @@ public:
      return SDValue();    // this is here to silence compiler errors
    }
  
+  /// isUsedByReturnOnly - Return true if result of the specified node is used
+  /// by a return node only. This is used to determine whether it is possible
+  /// to codegen a libcall as tail call at legalization time.
+  virtual bool isUsedByReturnOnly(SDNode *N) const {
+    return false;
+  }
+
    /// LowerOperationWrapper - This callback is invoked by the type legalizer
    /// to legalize nodes with an illegal operand type but legal result types.
    /// It replaces the LowerOperation callback in the type Legalizer.
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp

index 2cf3c6666cd7765dc7e94c65a2db0aea794f3238..36638c36de67abda6fc7a5bba3979b98b92f231c 100644 (file)
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -19,6 +19,7 @@
  #include "llvm/LLVMContext.h"
  #include "llvm/Module.h"
  #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
  #include "llvm/Target/TargetData.h"
  #include "llvm/Target/TargetLowering.h"
  #include "llvm/Target/TargetOptions.h"
@@ -283,3 +284,20 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
    return true;
  }
  
+bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+                                const TargetLowering &TLI) {
+  const Function *F = DAG.getMachineFunction().getFunction();
+
+  // Conservatively require the attributes of the call to match those of
+  // the return. Ignore noalias because it doesn't affect the call sequence.
+  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  if (CallerRetAttr & ~Attribute::NoAlias)
+    return false;
+
+  // It's not safe to eliminate the sign / zero extension of the return value.
+  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+    return false;
+
+  // Check if the only use is a function return node.
+  return TLI.isUsedByReturnOnly(Node);
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

index 6f6dcc0b1566f0ccf70af6017dc882f9576cade4..4b970e091fec89c0561362ea62b63e7fdb29f88f 100644 (file)
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -11,13 +11,14 @@
  //
  //===----------------------------------------------------------------------===//
  
-#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/Analysis.h"
  #include "llvm/CodeGen/MachineFunction.h"
  #include "llvm/CodeGen/MachineFrameInfo.h"
  #include "llvm/CodeGen/MachineJumpTableInfo.h"
  #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
  #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
  #include "llvm/Target/TargetFrameInfo.h"
  #include "llvm/Target/TargetLowering.h"
  #include "llvm/Target/TargetData.h"
@@ -1948,12 +1949,20 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
  
    // Splice the libcall in wherever FindInputOutputChains tells us to.
    const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+
+  // isTailCall may be true since the callee does not reference caller stack
+  // frame. Check if it's in the right position.
+  bool isTailCall = isInTailCallPosition(DAG, Node, TLI);
    std::pair<SDValue, SDValue> CallInfo =
      TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
-                    0, TLI.getLibcallCallingConv(LC), false,
+                    0, TLI.getLibcallCallingConv(LC), isTailCall,
                      /*isReturnValueUsed=*/true,
                      Callee, Args, DAG, Node->getDebugLoc());
  
+  if (!CallInfo.second.getNode())
+    // It's a tailcall, return the chain (which is the DAG root).
+    return DAG.getRoot();
+
    // Legalize the call sequence, starting with the chain.  This will advance
    // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
    // was added by LowerCallTo (guaranteeing proper serialization of calls).
@@ -1988,7 +1997,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
    const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
    std::pair<SDValue, SDValue> CallInfo =
      TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
-                    0, TLI.getLibcallCallingConv(LC), false,
+                    0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
                      /*isReturnValueUsed=*/true,
                      Callee, Args, DAG, Node->getDebugLoc());
  
@@ -2558,7 +2567,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
      TargetLowering::ArgListTy Args;
      std::pair<SDValue, SDValue> CallResult =
        TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
-                      false, false, false, false, 0, CallingConv::C, false,
+                      false, false, false, false, 0, CallingConv::C,
+                      /*isTailCall=*/false,
                        /*isReturnValueUsed=*/true,
                        DAG.getExternalSymbol("__sync_synchronize",
                                              TLI.getPointerTy()),
@@ -2609,7 +2619,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
      TargetLowering::ArgListTy Args;
      std::pair<SDValue, SDValue> CallResult =
        TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
-                      false, false, false, false, 0, CallingConv::C, false,
+                      false, false, false, false, 0, CallingConv::C,
+                      /*isTailCall=*/false,
                        /*isReturnValueUsed=*/true,
                        DAG.getExternalSymbol("abort", TLI.getPointerTy()),
                        Args, DAG, dl);
diff --git a/lib/Target/ARM/ARMFrameInfo.cpp b/lib/Target/ARM/ARMFrameInfo.cpp

index e2531d04d0558fe68a6c561fde0855ebfbc40dcb..58a7c61d90ffefc71ab6179ba4de4ea44945819c 100644 (file)
--- a/lib/Target/ARM/ARMFrameInfo.cpp
+++ b/lib/Target/ARM/ARMFrameInfo.cpp
@@ -374,16 +374,19 @@ void ARMFrameInfo::emitEpilogue(MachineFunction &MF,
      MachineOperand &JumpTarget = MBBI->getOperand(0);
  
      // Jump to label or value in register.
-    if (RetOpcode == ARM::TCRETURNdi) {
-      BuildMI(MBB, MBBI, dl,
-            TII.get(STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)).
-        addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
-                         JumpTarget.getTargetFlags());
-    } else if (RetOpcode == ARM::TCRETURNdiND) {
-      BuildMI(MBB, MBBI, dl,
-            TII.get(STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND)).
-        addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
-                         JumpTarget.getTargetFlags());
+    if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) {
+      unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi)
+        ? (STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)
+        : (STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND);
+      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+      if (JumpTarget.isGlobal())
+        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+                             JumpTarget.getTargetFlags());
+      else {
+        assert(JumpTarget.isSymbol());
+        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+                              JumpTarget.getTargetFlags());
+      }
      } else if (RetOpcode == ARM::TCRETURNri) {
        BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)).
          addReg(JumpTarget.getReg(), RegState::Kill);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index da274de5a0f9d62248002fee3b1d9363f2831aea..3c90704b7e9a4d5fe9e51db7cb7c5402f13a24fd 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1519,30 +1519,13 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
    // whether LR is going to be used.  Probably the right approach is to
    // generate the tail call here and turn it back into CALL/RET in
    // emitEpilogue if LR is used.
-  if (Subtarget->isThumb1Only())
-    return false;
-
-  // For the moment, we can only do this to functions defined in this
-  // compilation, or to indirect calls.  A Thumb B to an ARM function,
-  // or vice versa, is not easily fixed up in the linker unlike BL.
-  // (We could do this by loading the address of the callee into a register;
-  // that is an extra instruction over the direct call and burns a register
-  // as well, so is not likely to be a win.)
-
-  // It might be safe to remove this restriction on non-Darwin.
  
    // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
    // but we need to make sure there are enough registers; the only valid
    // registers are the 4 used for parameters.  We don't currently do this
    // case.
-  if (isa<ExternalSymbolSDNode>(Callee))
-      return false;
-
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-    const GlobalValue *GV = G->getGlobal();
-    if (GV->isDeclaration() || GV->isWeakForLinker())
-      return false;
-  }
+  if (Subtarget->isThumb1Only())
+    return false;
  
    // If the calling conventions do not match, then we'd better make sure the
    // results are returned in the same way as what the caller expects.
@@ -1720,6 +1703,58 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
    return result;
  }
  
+bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const {
+  if (N->getNumValues() != 1)
+    return false;
+  if (!N->hasNUsesOfValue(1, 0))
+    return false;
+
+  unsigned NumCopies = 0;
+  SDNode* Copies[2];
+  SDNode *Use = *N->use_begin();
+  if (Use->getOpcode() == ISD::CopyToReg) {
+    Copies[NumCopies++] = Use;
+  } else if (Use->getOpcode() == ARMISD::VMOVRRD) {
+    // f64 returned in a pair of GPRs.
+    for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end();
+         UI != UE; ++UI) {
+      if (UI->getOpcode() != ISD::CopyToReg)
+        return false;
+      Copies[UI.getUse().getResNo()] = *UI;
+      ++NumCopies;
+    }
+  } else if (Use->getOpcode() == ISD::BITCAST) {
+    // f32 returned in a single GPR.
+    if (!Use->hasNUsesOfValue(1, 0))
+      return false;
+    Use = *Use->use_begin();
+    if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0))
+      return false;
+    Copies[NumCopies++] = Use;
+  } else {
+    return false;
+  }
+
+  if (NumCopies != 1 && NumCopies != 2)
+    return false;
+  for (unsigned i = 0; i < NumCopies; ++i) {
+    SDNode *Copy = Copies[i];
+    for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+         UI != UE; ++UI) {
+      if (UI->getOpcode() == ISD::CopyToReg) {
+        SDNode *Use = *UI;
+        if (Use == Copies[0] || Use == Copies[1])
+          continue;
+        return false;
+      }
+      if (UI->getOpcode() != ARMISD::RET_FLAG)
+        return false;
+    }
+  }
+
+  return true;
+}
+
  // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
  // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
  // one of the above mentioned nodes. It has to be wrapped because otherwise
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h

index 38f7399110429836816608b0e68a03977f1d9f1d..6d11bad357efa769139cd9104ad95ffb48419b28 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -421,6 +421,8 @@ namespace llvm {
                    const SmallVectorImpl<SDValue> &OutVals,
                    DebugLoc dl, SelectionDAG &DAG) const;
  
+    virtual bool isUsedByReturnOnly(SDNode *N) const;
+
      SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
                        SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
      SDValue getVFPCmp(SDValue LHS, SDValue RHS,
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt

index e67fab109047ed9cdce789af6c0fb50bae122c35..a305ae6ec5505eea1d01814a6ae8b49f7c8734d6 100644 (file)
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -895,24 +895,6 @@ compare:
  
  //===---------------------------------------------------------------------===//
  
-Linux is missing some basic tail call support:
-
-#include <math.h>
-double foo(double a) {    return sin(a); }
-
-This compiles into this on x86-64 Linux (but not darwin):
-foo:
-       subq    $8, %rsp
-       call    sin
-       addq    $8, %rsp
-       ret
-vs:
-
-foo:
-        jmp sin
-
-//===---------------------------------------------------------------------===//
-
  Tail call optimization improvements: Tail call optimization currently
  pushes all arguments on the top of the stack (their normal place for
  non-tail call optimized calls) that source from the callers arguments
diff --git a/lib/Target/X86/X86FrameInfo.cpp b/lib/Target/X86/X86FrameInfo.cpp

index 494e9050282e7d46f265f56e099e4fccd12fc012..c47b0fa6970720c840ab66b256878beece463184 100644 (file)
--- a/lib/Target/X86/X86FrameInfo.cpp
+++ b/lib/Target/X86/X86FrameInfo.cpp
@@ -712,10 +712,17 @@ void X86FrameInfo::emitEpilogue(MachineFunction &MF,
  
      // Jump to label or value in register.
      if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
-      BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
-                                     ? X86::TAILJMPd : X86::TAILJMPd64)).
-        addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
-                         JumpTarget.getTargetFlags());
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
+                                       ? X86::TAILJMPd : X86::TAILJMPd64));
+      if (JumpTarget.isGlobal())
+        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+                             JumpTarget.getTargetFlags());
+      else {
+        assert(JumpTarget.isSymbol());
+        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+                              JumpTarget.getTargetFlags());
+      }
      } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
        MachineInstrBuilder MIB =
          BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 6d6d83d55789b81ea97c4ce5189e28d4cc7638c5..bc6d84eeec17c7e1ae9a85c0c6eba75217f4a844 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1338,6 +1338,23 @@ X86TargetLowering::LowerReturn(SDValue Chain,
                       MVT::Other, &RetOps[0], RetOps.size());
  }
  
+bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
+  if (N->getNumValues() != 1)
+    return false;
+  if (!N->hasNUsesOfValue(1, 0))
+    return false;
+
+  SDNode *Copy = *N->use_begin();
+  if (Copy->getOpcode() != ISD::CopyToReg)
+    return false;
+  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+       UI != UE; ++UI)
+    if (UI->getOpcode() != X86ISD::RET_FLAG)
+      return false;
+
+  return true;
+}
+
  /// LowerCallResult - Lower the result values of a call into the
  /// appropriate copies out of appropriate physical registers.
  ///
@@ -2142,17 +2159,19 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
      unsigned char OpFlags = 0;
  
-    // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
-    // symbols should go through the PLT.
-    if (Subtarget->isTargetELF() &&
-        getTargetMachine().getRelocationModel() == Reloc::PIC_) {
-      OpFlags = X86II::MO_PLT;
-    } else if (Subtarget->isPICStyleStubAny() &&
-               Subtarget->getDarwinVers() < 9) {
-      // PC-relative references to external symbols should go through $stub,
-      // unless we're building with the leopard linker or later, which
-      // automatically synthesizes these stubs.
-      OpFlags = X86II::MO_DARWIN_STUB;
+    if (!isTailCall) {
+      // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
+      // external symbols should go through the PLT.
+      if (Subtarget->isTargetELF() &&
+          getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+        OpFlags = X86II::MO_PLT;
+      } else if (Subtarget->isPICStyleStubAny() &&
+                 Subtarget->getDarwinVers() < 9) {
+        // PC-relative references to external symbols should go through $stub,
+        // unless we're building with the leopard linker or later, which
+        // automatically synthesizes these stubs.
+        OpFlags = X86II::MO_DARWIN_STUB;
+      }
      }
  
      Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index b52170201367dd687bd96e7ea751d16b4b2272fe..edcdceb39a592c353af441aded70b2e20225051d 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -805,6 +805,8 @@ namespace llvm {
                    const SmallVectorImpl<SDValue> &OutVals,
                    DebugLoc dl, SelectionDAG &DAG) const;
  
+    virtual bool isUsedByReturnOnly(SDNode *N) const;
+
      virtual bool
        CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
                       const SmallVectorImpl<ISD::OutputArg> &Outs,
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll

index db5afe3f56cb8741e7fdd9f0729247109fc8a7ba..213db5e1d01eeaed3aa0cd753a20234a602799ec 100644 (file)
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -1,8 +1,5 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -march=arm | FileCheck %s -check-prefix=CHECKV4
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5
-; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\
-; RUN:   -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF
-; XFAIL: *
+; RUN: llc < %s -mtriple=armv6-apple-darwin -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
  
  @t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
  
@@ -10,40 +7,59 @@ declare void @g(i32, i32, i32, i32)
  
  define void @t1() {
  ; CHECKELF: t1:
-; CHECKELF: PLT
+; CHECKELF: bl g(PLT)
          call void @g( i32 1, i32 2, i32 3, i32 4 )
          ret void
  }
  
  define void @t2() {
-; CHECKV4: t2:
-; CHECKV4: bx r0 @ TAILCALL
-; CHECKV5: t2:
-; CHECKV5: bx r0 @ TAILCALL
+; CHECKV6: t2:
+; CHECKV6: bx r0 @ TAILCALL
          %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
          %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
          ret void
  }
  
-define i32* @t3(i32, i32, i32*, i32*, i32*) nounwind {
-; CHECKV4: t3:
-; CHECKV4: bx r{{.*}}
-BB0:
-  %5 = inttoptr i32 %0 to i32*                    ; <i32*> [#uses=1]
-  %t35 = volatile load i32* %5                    ; <i32> [#uses=1]
-  %6 = inttoptr i32 %t35 to i32**                 ; <i32**> [#uses=1]
-  %7 = getelementptr i32** %6, i32 86             ; <i32**> [#uses=1]
-  %8 = load i32** %7                              ; <i32*> [#uses=1]
-  %9 = bitcast i32* %8 to i32* (i32, i32*, i32, i32*, i32*, i32*)* ; <i32* (i32, i32*, i32, i32*, i32*, i32*)*> [#uses=1]
-  %10 = call i32* %9(i32 %0, i32* null, i32 %1, i32* %2, i32* %3, i32* %4) ; <i32*> [#uses=1]
-  ret i32* %10
-}
-
-define void @t4() {
-; CHECKV4: t4:
-; CHECKV4: b _t2  @ TAILCALL
-; CHECKV5: t4:
-; CHECKV5: b _t2  @ TAILCALL
+define void @t3() {
+; CHECKV6: t3:
+; CHECKV6: b _t2  @ TAILCALL
+; CHECKELF: t3:
+; CHECKELF: b t2(PLT) @ TAILCALL
          tail call void @t2( )            ; <i32> [#uses=0]
          ret void
  }
+
+; Sibcall optimization of expanded libcalls. rdar://8707777
+define double @t4(double %a) nounwind readonly ssp {
+entry:
+; CHECKV6: t4:
+; CHECKV6: b _sin @ TAILCALL
+; CHECKELF: t4:
+; CHECKELF: b sin(PLT) @ TAILCALL
+  %0 = tail call double @sin(double %a) nounwind readonly ; <double> [#uses=1]
+  ret double %0
+}
+
+define float @t5(float %a) nounwind readonly ssp {
+entry:
+; CHECKV6: t5:
+; CHECKV6: b _sinf @ TAILCALL
+; CHECKELF: t5:
+; CHECKELF: b sinf(PLT) @ TAILCALL
+  %0 = tail call float @sinf(float %a) nounwind readonly ; <float> [#uses=1]
+  ret float %0
+}
+
+declare float @sinf(float) nounwind readonly
+
+declare double @sin(double) nounwind readonly
+
+define i32 @t6(i32 %a, i32 %b) nounwind readnone {
+entry:
+; CHECKV6: t6:
+; CHECKV6: b ___divsi3 @ TAILCALL
+; CHECKELF: t6:
+; CHECKELF: b __aeabi_idiv(PLT) @ TAILCALL
+  %0 = sdiv i32 %a, %b
+  ret i32 %0
+}
diff --git a/test/CodeGen/X86/sibcall-5.ll b/test/CodeGen/X86/sibcall-5.ll

new file mode 100644 (file)

index 0000000..4a2e7fb
--- /dev/null
+++ b/test/CodeGen/X86/sibcall-5.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; Sibcall optimization of expanded libcalls.
+; rdar://8707777
+
+define double @foo(double %a) nounwind readonly ssp {
+entry:
+; CHECK: foo:
+; CHECK: jmp {{_?}}sin
+  %0 = tail call double @sin(double %a) nounwind readonly
+  ret double %0
+}
+
+define float @bar(float %a) nounwind readonly ssp {
+; CHECK: bar:
+; CHECK: jmp {{_?}}sinf
+entry:
+  %0 = tail call float @sinf(float %a) nounwind readonly
+  ret float %0
+}
+
+declare float @sinf(float) nounwind readonly
+
+declare double @sin(double) nounwind readonly
author	Evan Cheng <evan.cheng@apple.com>
	Tue, 30 Nov 2010 23:55:39 +0000 (23:55 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Tue, 30 Nov 2010 23:55:39 +0000 (23:55 +0000)
include/llvm/CodeGen/Analysis.h		patch \| blob \| history
include/llvm/Target/TargetLowering.h		patch \| blob \| history
lib/CodeGen/Analysis.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/LegalizeDAG.cpp		patch \| blob \| history
lib/Target/ARM/ARMFrameInfo.cpp		patch \| blob \| history
lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
lib/Target/ARM/ARMISelLowering.h		patch \| blob \| history
lib/Target/X86/README.txt		patch \| blob \| history
lib/Target/X86/X86FrameInfo.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
test/CodeGen/ARM/call-tc.ll		patch \| blob \| history
test/CodeGen/X86/sibcall-5.ll	[new file with mode: 0644]	patch \| blob