From: Evan Cheng <evan.cheng@apple.com>
Date: Fri, 5 Feb 2010 02:21:12 +0000 (+0000)
Subject: Handle tail call with byval arguments.
X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=5f94193b366435079217dfc34e30241575907cbd;p=oota-llvm.git

Handle tail call with byval arguments.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@95351 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 40f720b168b..6b24fec8a43 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1790,11 +1790,15 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   MachineFunction &MF = DAG.getMachineFunction();
   bool Is64Bit        = Subtarget->is64Bit();
   bool IsStructRet    = CallIsStructReturn(Outs);
+  bool IsSibcall      = false;
 
-  if (isTailCall)
+  if (isTailCall) {
     // Check if it's really possible to do a tail call.
     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
                                                    Outs, Ins, DAG);
+    if (!PerformTailCallOpt && isTailCall)
+      IsSibcall = true;
+  }
 
   assert(!(isVarArg && CallConv == CallingConv::Fast) &&
          "Var args not supported with calling convention fastcc");
@@ -1809,7 +1813,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   unsigned NumBytes = CCInfo.getNextStackOffset();
   if (FuncIsMadeTailCallSafe(CallConv))
     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
-  else if (isTailCall && !PerformTailCallOpt)
+  else if (IsSibcall)
     // This is a sibcall. The memory operands are available in caller's
     // own caller's stack.
     NumBytes = 0;
@@ -1884,15 +1888,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
     if (VA.isRegLoc()) {
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
-    } else {
-      if (!isTailCall || (isTailCall && isByVal)) {
-        assert(VA.isMemLoc());
-        if (StackPtr.getNode() == 0)
-          StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
-
-        MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
-                                               dl, DAG, VA, Flags));
-      }
+    } else if ((!isTailCall || isByVal) && !IsSibcall) {
+      assert(VA.isMemLoc());
+      if (StackPtr.getNode() == 0)
+        StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
+      MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
+                                             dl, DAG, VA, Flags));
     }
   }
 
@@ -2245,6 +2246,50 @@ unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
   return Offset;
 }
 
+/// MatchingStackOffset - Return true if the given stack call argument is
+/// already available in the same position (relatively) of the caller's
+/// incoming argument stack.
+static
+bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
+                         MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
+                         const X86InstrInfo *TII) {
+  int FI;
+  if (Arg.getOpcode() == ISD::CopyFromReg) {
+    unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+    if (!VR || TargetRegisterInfo::isPhysicalRegister(VR))
+      return false;
+    MachineInstr *Def = MRI->getVRegDef(VR);
+    if (!Def)
+      return false;
+    if (!Flags.isByVal()) {
+      if (!TII->isLoadFromStackSlot(Def, FI))
+        return false;
+    } else {
+      unsigned Opcode = Def->getOpcode();
+      if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
+          Def->getOperand(1).isFI()) {
+        FI = Def->getOperand(1).getIndex();
+        if (MFI->getObjectSize(FI) != Flags.getByValSize())
+          return false;
+      } else
+        return false;
+    }
+  } else {
+    LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg);
+    if (!Ld)
+      return false;
+    SDValue Ptr = Ld->getBasePtr();
+    FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
+    if (!FINode)
+      return false;
+    FI = FINode->getIndex();
+  }
+
+  if (!MFI->isFixedObjectIndex(FI))
+    return false;
+  return Offset == MFI->getObjectOffset(FI);
+}
+
 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
 /// for tail call optimization. Targets which want to do tail call
 /// optimization should implement this function.
@@ -2295,27 +2340,19 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
       // Check if the arguments are already laid out in the right way as
       // the caller's fixed stack objects.
       MachineFrameInfo *MFI = MF.getFrameInfo();
+      const MachineRegisterInfo *MRI = &MF.getRegInfo();
+      const X86InstrInfo *TII =
+        ((X86TargetMachine&)getTargetMachine()).getInstrInfo();
       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
         CCValAssign &VA = ArgLocs[i];
         EVT RegVT = VA.getLocVT();
         SDValue Arg = Outs[i].Val;
         ISD::ArgFlagsTy Flags = Outs[i].Flags;
-        if (Flags.isByVal())
-          return false; // TODO
         if (VA.getLocInfo() == CCValAssign::Indirect)
           return false;
         if (!VA.isRegLoc()) {
-          LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg);
-          if (!Ld)
-            return false;
-          SDValue Ptr = Ld->getBasePtr();
-          FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
-          if (!FINode)
-            return false;
-          int FI = FINode->getIndex();
-          if (!MFI->isFixedObjectIndex(FI))
-            return false;
-          if (VA.getLocMemOffset() != MFI->getObjectOffset(FI))
+          if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
+                                   MFI, MRI, TII))
             return false;
         }
       }
diff --git a/test/CodeGen/X86/tailcall2.ll b/test/CodeGen/X86/tailcall2.ll
index 46fb7c3ef19..b2fe9d3bb91 100644
--- a/test/CodeGen/X86/tailcall2.ll
+++ b/test/CodeGen/X86/tailcall2.ll
@@ -70,7 +70,7 @@ define i32 @t6(i32 %x) nounwind ssp {
 entry:
 ; 32: t6:
 ; 32: call {{_?}}t6
-; 32: call {{_?}}bar
+; 32: jmp {{_?}}bar
 
 ; 64: t6:
 ; 64: jmp {{_?}}t6
@@ -140,3 +140,48 @@ entry:
 }
 
 declare i32 @foo4()
+
+define i32 @t11(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind ssp {
+; In 32-bit mode, it's emitting a bunch of dead loads that are not being
+; eliminated currently.
+
+; 32: t11:
+; 32: jmp {{_?}}foo5
+
+; 64: t11:
+; 64: jmp {{_?}}foo5
+entry:
+  %0 = icmp eq i32 %x, 0
+  br i1 %0, label %bb6, label %bb
+
+bb:
+  %1 = tail call i32 @foo5(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind
+  ret i32 %1
+
+bb6:
+  ret i32 0
+}
+
+declare i32 @foo5(i32, i32, i32, i32, i32)
+
+%struct.t = type { i32, i32, i32, i32, i32 }
+
+define i32 @t12(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind ssp {
+; 32: t12:
+; 32: jmp {{_?}}foo6
+
+; 64: t12:
+; 64: jmp {{_?}}foo6
+entry:
+  %0 = icmp eq i32 %x, 0
+  br i1 %0, label %bb2, label %bb
+
+bb:
+  %1 = tail call i32 @foo6(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind
+  ret i32 %1
+
+bb2:
+  ret i32 0
+}
+
+declare i32 @foo6(i32, i32, %struct.t* byval align 4)