An argument allocation may be used by a call at most once because
the call may deallocate it. The ``inalloca`` attribute cannot be
used in conjunction with other attributes that affect argument
- storage, like ``inreg``, ``nest``, ``sret``, or ``byval``.
+ storage, like ``inreg``, ``nest``, ``sret``, or ``byval``. The
+ ``inalloca`` attribute also disables LLVM's implicit lowering of
+ large aggregate return values, which means that frontend authors
+ must lower them with ``sret`` pointers.
When the call site is reached, the argument allocation must have
been the most recent stack allocation that is still live, or the
paramHasAttr(ArgNo + 1, Attribute::InAlloca);
}
- /// @brief Determine if there are any inalloca arguments.
+ /// @brief Determine if there are is an inalloca argument. Only the last
+ /// argument can have the inalloca attribute.
bool hasInAllocaArgument() const {
- return getAttributes().hasAttrSomewhere(Attribute::InAlloca);
+ return paramHasAttr(arg_size(), Attribute::InAlloca);
}
bool doesNotAccessMemory(unsigned ArgNo) const {
static const uint64_t ByValAlignOffs = 7;
static const uint64_t Split = 1ULL<<11;
static const uint64_t SplitOffs = 11;
+ static const uint64_t InAlloca = 1ULL<<12; ///< Passed with inalloca
+ static const uint64_t InAllocaOffs = 12;
static const uint64_t OrigAlign = 0x1FULL<<27;
static const uint64_t OrigAlignOffs = 27;
static const uint64_t ByValSize = 0xffffffffULL<<32; ///< Struct size
bool isByVal() const { return Flags & ByVal; }
void setByVal() { Flags |= One << ByValOffs; }
+ bool isInAlloca() const { return Flags & InAlloca; }
+ void setInAlloca() { Flags |= One << InAllocaOffs; }
+
bool isNest() const { return Flags & Nest; }
void setNest() { Flags |= One << NestOffs; }
return getValueType(Ty, AllowUnknown).getSimpleVT();
}
- /// Return the desired alignment for ByVal aggregate function arguments in the
- /// caller parameter area. This is the actual alignment, not its logarithm.
+ /// Return the desired alignment for ByVal or InAlloca aggregate function
+ /// arguments in the caller parameter area. This is the actual alignment, not
+ /// its logarithm.
virtual unsigned getByValTypeAlignment(Type *Ty) const;
/// Return the type of registers that this ValueType will eventually require.
bool isSRet : 1;
bool isNest : 1;
bool isByVal : 1;
+ bool isInAlloca : 1;
bool isReturned : 1;
uint16_t Alignment;
ArgListEntry() : isSExt(false), isZExt(false), isInReg(false),
- isSRet(false), isNest(false), isByVal(false), isReturned(false),
- Alignment(0) { }
+ isSRet(false), isNest(false), isByVal(false), isInAlloca(false),
+ isReturned(false), Alignment(0) { }
void setAttributes(ImmutableCallSite *CS, unsigned AttrIdx);
};
int DemoteStackIdx = -100;
if (!CanLowerReturn) {
+ assert(!CS.hasInAllocaArgument() &&
+ "sret demotion is incompatible with inalloca");
uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(
FTy->getReturnType());
unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(
Flags.setInReg();
if (Args[i].isSRet)
Flags.setSRet();
- if (Args[i].isByVal) {
+ if (Args[i].isByVal)
Flags.setByVal();
+ if (Args[i].isInAlloca) {
+ Flags.setInAlloca();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // inalloca. This way we can know how many bytes we should've allocated
+ // and how many bytes a callee cleanup function will pop. If we port
+ // inalloca to more targets, we'll have to add custom inalloca handling
+ // in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (Args[i].isByVal || Args[i].isInAlloca) {
PointerType *Ty = cast<PointerType>(Args[i].Ty);
Type *ElementTy = Ty->getElementType();
Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy));
Flags.setInReg();
if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
Flags.setSRet();
- if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) {
+ if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal))
Flags.setByVal();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) {
+ Flags.setInAlloca();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // inalloca. This way we can know how many bytes we should've allocated
+ // and how many bytes a callee cleanup function will pop. If we port
+ // inalloca to more targets, we'll have to add custom inalloca handling
+ // in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (Flags.isByVal() || Flags.isInAlloca()) {
PointerType *Ty = cast<PointerType>(I->getType());
Type *ElementTy = Ty->getElementType();
Flags.setByValSize(TD->getTypeAllocSize(ElementTy));
isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet);
isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest);
isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal);
+ isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca);
isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned);
Alignment = CS->getParamAlignment(AttrIdx);
}
for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
AI != AE; ++AI) {
Type *Ty = AI->getType();
- // 'Dereference' type in case of byval parameter attribute
- if (AI->hasByValAttr())
+ // 'Dereference' type in case of byval or inalloca parameter attribute.
+ if (AI->hasByValOrInAllocaAttr())
Ty = cast<PointerType>(Ty)->getElementType();
// Size should be aligned to DWORD boundary
ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
if (isVarArg && isWin64)
return false;
+ // Don't know about inalloca yet.
+ if (CS.hasInAllocaArgument())
+ return false;
+
// Fast-isel doesn't know about callee-pop yet.
if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg,
TM.Options.GuaranteedTailCallOpt))
X86Info->setTCReturnAddrDelta(FPDiff);
}
+ unsigned NumBytesToPush = NumBytes;
+ unsigned NumBytesToPop = NumBytes;
+
+ // If we have an inalloca argument, all stack space has already been allocated
+ // for us and be right at the top of the stack. We don't support multiple
+ // arguments passed in memory when using inalloca.
+ if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
+ NumBytesToPush = 0;
+ assert(ArgLocs.back().getLocMemOffset() == 0 &&
+ "an inalloca argument must be the only memory argument");
+ }
+
if (!IsSibcall)
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
- dl);
+ Chain = DAG.getCALLSEQ_START(
+ Chain, DAG.getIntPtrConstant(NumBytesToPush, true), dl);
SDValue RetAddrFrIdx;
// Load return address for tail calls.
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ // Skip inalloca arguments, they have already been written.
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ if (Flags.isInAlloca())
+ continue;
+
CCValAssign &VA = ArgLocs[i];
EVT RegVT = VA.getLocVT();
SDValue Arg = OutVals[i];
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
bool isByVal = Flags.isByVal();
// Promote the value if needed.
SmallVector<SDValue, 8> Ops;
if (!IsSibcall && isTailCall) {
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(0, true), InFlag, dl);
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(NumBytesToPop, true),
+ DAG.getIntPtrConstant(0, true), InFlag, dl);
InFlag = Chain.getValue(1);
}
// Returns a flag for retval copy to use.
if (!IsSibcall) {
Chain = DAG.getCALLSEQ_END(Chain,
- DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(NumBytesToPop, true),
DAG.getIntPtrConstant(NumBytesForCalleeToPop,
true),
InFlag, dl);
--- /dev/null
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
+
+%Foo = type { i32, i32 }
+
+%frame = type { %Foo, i32, %Foo }
+
+declare void @f(%frame* inalloca %a)
+
+declare void @Foo_ctor(%Foo* %this)
+
+define void @g() {
+entry:
+ %args = alloca %frame, inalloca
+ %c = getelementptr %frame* %args, i32 0, i32 2
+; CHECK: movl $20, %eax
+; CHECK: calll __chkstk
+; CHECK: movl %esp,
+ call void @Foo_ctor(%Foo* %c)
+; CHECK: leal 12(%{{.*}}),
+; CHECK: subl $4, %esp
+; CHECK: calll _Foo_ctor
+; CHECK: addl $4, %esp
+ %b = getelementptr %frame* %args, i32 0, i32 1
+ store i32 42, i32* %b
+; CHECK: movl $42,
+ %a = getelementptr %frame* %args, i32 0, i32 0
+ call void @Foo_ctor(%Foo* %a)
+; CHECK: subl $4, %esp
+; CHECK: calll _Foo_ctor
+; CHECK: addl $4, %esp
+ call void @f(%frame* inalloca %args)
+; CHECK: calll _f
+ ret void
+}
--- /dev/null
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
+
+%Iter = type { i32, i32, i32 }
+
+%frame.reverse = type { %Iter, %Iter }
+
+declare void @llvm.stackrestore(i8*)
+declare i8* @llvm.stacksave()
+declare void @begin(%Iter* sret)
+declare void @plus(%Iter* sret, %Iter*, i32)
+declare void @reverse(%frame.reverse* inalloca align 4)
+
+define i32 @main() {
+ %temp.lvalue = alloca %Iter
+ br label %blah
+
+blah:
+ %inalloca.save = call i8* @llvm.stacksave()
+ %rev_args = alloca %frame.reverse, inalloca, align 4
+ %beg = getelementptr %frame.reverse* %rev_args, i32 0, i32 0
+ %end = getelementptr %frame.reverse* %rev_args, i32 0, i32 1
+
+; CHECK: calll __chkstk
+; CHECK: movl %[[beg:[^,]*]], %esp
+; CHECK: leal 12(%[[beg]]), %[[end:[^ ]*]]
+
+ call void @begin(%Iter* sret %temp.lvalue)
+; CHECK: calll _begin
+
+ invoke void @plus(%Iter* sret %end, %Iter* %temp.lvalue, i32 4)
+ to label %invoke.cont unwind label %lpad
+
+; Uses end as sret param.
+; CHECK: movl %[[end]], (%esp)
+; CHECK: calll _plus
+
+invoke.cont:
+ call void @begin(%Iter* sret %beg)
+
+; CHECK: movl %[[beg]],
+; CHECK: calll _begin
+
+ invoke void @reverse(%frame.reverse* inalloca align 4 %rev_args)
+ to label %invoke.cont5 unwind label %lpad
+
+invoke.cont5: ; preds = %invoke.cont
+ call void @llvm.stackrestore(i8* %inalloca.save)
+ ret i32 0
+
+lpad: ; preds = %invoke.cont, %entry
+ %lp = landingpad { i8*, i32 } personality i8* null
+ cleanup
+ unreachable
+}
--- /dev/null
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
+
+%Foo = type { i32, i32 }
+
+declare x86_stdcallcc void @f(%Foo* inalloca %a)
+declare x86_stdcallcc void @i(i32 %a)
+
+define void @g() {
+ %b = alloca %Foo, inalloca
+; CHECK: movl $8, %eax
+; CHECK: calll __chkstk
+; CHECK: movl %[[REG:[^,]*]], %esp
+ %f1 = getelementptr %Foo* %b, i32 0, i32 0
+ %f2 = getelementptr %Foo* %b, i32 0, i32 1
+ store i32 13, i32* %f1
+ store i32 42, i32* %f2
+; CHECK: movl $13, (%[[REG]])
+; CHECK: movl $42, 4(%[[REG]])
+ call x86_stdcallcc void @f(%Foo* inalloca %b)
+; CHECK: calll _f@8
+; CHECK-NOT: %esp
+; CHECK: subl $4, %esp
+; CHECK: calll _i@4
+ call x86_stdcallcc void @i(i32 0)
+ ret void
+}
--- /dev/null
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
+
+%Foo = type { i32, i32 }
+
+declare void @f(%Foo* inalloca %b)
+
+define void @a() {
+; CHECK-LABEL: _a:
+entry:
+ %b = alloca %Foo, inalloca
+; CHECK: movl $8, %eax
+; CHECK: calll __chkstk
+; CHECK: movl %[[REG:[^,]*]], %esp
+ %f1 = getelementptr %Foo* %b, i32 0, i32 0
+ %f2 = getelementptr %Foo* %b, i32 0, i32 1
+ store i32 13, i32* %f1
+ store i32 42, i32* %f2
+; CHECK: movl $13, (%[[REG]])
+; CHECK: movl $42, 4(%[[REG]])
+ call void @f(%Foo* inalloca %b)
+; CHECK: calll _f
+ ret void
+}
+
+declare void @inreg_with_inalloca(i32 inreg %a, %Foo* inalloca %b)
+
+define void @b() {
+; CHECK-LABEL: _b:
+entry:
+ %b = alloca %Foo, inalloca
+; CHECK: movl $8, %eax
+; CHECK: calll __chkstk
+; CHECK: movl %[[REG:[^,]*]], %esp
+ %f1 = getelementptr %Foo* %b, i32 0, i32 0
+ %f2 = getelementptr %Foo* %b, i32 0, i32 1
+ store i32 13, i32* %f1
+ store i32 42, i32* %f2
+; CHECK: movl $13, (%[[REG]])
+; CHECK: movl $42, 4(%[[REG]])
+ call void @inreg_with_inalloca(i32 inreg 1, %Foo* inalloca %b)
+; CHECK: movl $1, %eax
+; CHECK: calll _inreg_with_inalloca
+ ret void
+}
+
+declare x86_thiscallcc void @thiscall_with_inalloca(i8* %a, %Foo* inalloca %b)
+
+define void @c() {
+; CHECK-LABEL: _c:
+entry:
+ %b = alloca %Foo, inalloca
+; CHECK: movl $8, %eax
+; CHECK: calll __chkstk
+; CHECK: movl %[[REG:[^,]*]], %esp
+ %f1 = getelementptr %Foo* %b, i32 0, i32 0
+ %f2 = getelementptr %Foo* %b, i32 0, i32 1
+ store i32 13, i32* %f1
+ store i32 42, i32* %f2
+; CHECK: movl $13, (%[[REG]])
+; CHECK: movl $42, 4(%[[REG]])
+ call x86_thiscallcc void @thiscall_with_inalloca(i8* null, %Foo* inalloca %b)
+; CHECK: xorl %ecx, %ecx
+; CHECK: calll _thiscall_with_inalloca
+ ret void
+}