From 456ca048af35163b9f52187e92a23ee0a9f059e8 Mon Sep 17 00:00:00 2001 From: Stephen Lin Date: Sat, 20 Apr 2013 05:14:40 +0000 Subject: [PATCH] Add CodeGen support for functions that always return arguments via a new parameter attribute 'returned', which is taken advantage of in target-independent tail call opportunity detection and in ARM call lowering (when placed on an integral first parameter). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179925 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/LangRef.rst | 12 ++- include/llvm/IR/Argument.h | 4 + include/llvm/IR/Attributes.h | 1 + include/llvm/Target/TargetCallingConv.h | 37 ++++---- include/llvm/Target/TargetLowering.h | 16 ++-- lib/AsmParser/LLLexer.cpp | 1 + lib/AsmParser/LLParser.cpp | 3 + lib/AsmParser/LLToken.h | 1 + lib/CodeGen/Analysis.cpp | 26 ++++++ .../SelectionDAG/SelectionDAGBuilder.cpp | 18 ++-- lib/IR/Attributes.cpp | 3 + lib/IR/Function.cpp | 7 ++ lib/IR/Verifier.cpp | 51 +++++++++-- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 6 ++ lib/Target/ARM/ARMBaseRegisterInfo.h | 1 + lib/Target/ARM/ARMCallingConv.td | 11 +++ lib/Target/ARM/ARMISelLowering.cpp | 33 ++++++- lib/Target/ARM/ARMISelLowering.h | 3 +- test/CodeGen/ARM/this-return.ll | 91 +++++++++++++++++++ test/CodeGen/X86/this-return-64.ll | 89 ++++++++++++++++++ 20 files changed, 371 insertions(+), 43 deletions(-) create mode 100644 test/CodeGen/ARM/this-return.ll create mode 100644 test/CodeGen/X86/this-return-64.ll diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 905053fef0b..7dafbb98779 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -719,7 +719,17 @@ Currently, only the following parameter attributes are defined: ``nest`` This indicates that the pointer parameter can be excised using the :ref:`trampoline intrinsics `. This is not a valid - attribute for return values. + attribute for return values and can only be applied to one parameter. + +``returned`` + This indicates that the value of the function always returns the value + of the parameter as its return value. This is an optimization hint to + the code generator when generating the caller, allowing tail call + optimization and omission of register saves and restores in some cases; + it is not checked or enforced when generating the callee. The parameter + and the function return type must be valid operands for the + :ref:`bitcast instruction `. This is not a valid attribute for + return values and can only be applied to one parameter. .. _gc: diff --git a/include/llvm/IR/Argument.h b/include/llvm/IR/Argument.h index ef4e4fc7aa6..40d61ff6854 100644 --- a/include/llvm/IR/Argument.h +++ b/include/llvm/IR/Argument.h @@ -78,6 +78,10 @@ public: /// containing function. bool hasStructRetAttr() const; + /// \brief Return true if this argument has the returned attribute on it in + /// its containing function. + bool hasReturnedAttr() const; + /// \brief Add a Attribute to an argument. void addAttr(AttributeSet AS); diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h index f93f28b239a..c801436910d 100644 --- a/include/llvm/IR/Attributes.h +++ b/include/llvm/IR/Attributes.h @@ -87,6 +87,7 @@ public: OptimizeForSize, ///< opt_size ReadNone, ///< Function does not access memory ReadOnly, ///< Function only reads from memory + Returned, ///< Return value is always equal to this argument ReturnsTwice, ///< Function can return twice SExt, ///< Sign extended before/after call StackAlignment, ///< Alignment of stack for function (3 bits) diff --git a/include/llvm/Target/TargetCallingConv.h b/include/llvm/Target/TargetCallingConv.h index 2160e371bda..56ebfa41040 100644 --- a/include/llvm/Target/TargetCallingConv.h +++ b/include/llvm/Target/TargetCallingConv.h @@ -36,10 +36,12 @@ namespace ISD { static const uint64_t ByValOffs = 4; static const uint64_t Nest = 1ULL<<5; ///< Nested fn static chain static const uint64_t NestOffs = 5; - static const uint64_t ByValAlign = 0xFULL << 6; ///< Struct alignment - static const uint64_t ByValAlignOffs = 6; - static const uint64_t Split = 1ULL << 10; - static const uint64_t SplitOffs = 10; + static const uint64_t Returned = 1ULL<<6; + static const uint64_t ReturnedOffs = 6; + static const uint64_t ByValAlign = 0xFULL<<7; ///< Struct alignment + static const uint64_t ByValAlignOffs = 7; + static const uint64_t Split = 1ULL<<11; + static const uint64_t SplitOffs = 11; static const uint64_t OrigAlign = 0x1FULL<<27; static const uint64_t OrigAlignOffs = 27; static const uint64_t ByValSize = 0xffffffffULL << 32; ///< Struct size @@ -51,23 +53,26 @@ namespace ISD { public: ArgFlagsTy() : Flags(0) { } - bool isZExt() const { return Flags & ZExt; } - void setZExt() { Flags |= One << ZExtOffs; } + bool isZExt() const { return Flags & ZExt; } + void setZExt() { Flags |= One << ZExtOffs; } - bool isSExt() const { return Flags & SExt; } - void setSExt() { Flags |= One << SExtOffs; } + bool isSExt() const { return Flags & SExt; } + void setSExt() { Flags |= One << SExtOffs; } - bool isInReg() const { return Flags & InReg; } - void setInReg() { Flags |= One << InRegOffs; } + bool isInReg() const { return Flags & InReg; } + void setInReg() { Flags |= One << InRegOffs; } - bool isSRet() const { return Flags & SRet; } - void setSRet() { Flags |= One << SRetOffs; } + bool isSRet() const { return Flags & SRet; } + void setSRet() { Flags |= One << SRetOffs; } - bool isByVal() const { return Flags & ByVal; } - void setByVal() { Flags |= One << ByValOffs; } + bool isByVal() const { return Flags & ByVal; } + void setByVal() { Flags |= One << ByValOffs; } - bool isNest() const { return Flags & Nest; } - void setNest() { Flags |= One << NestOffs; } + bool isNest() const { return Flags & Nest; } + void setNest() { Flags |= One << NestOffs; } + + bool isReturned() const { return Flags & Returned; } + void setReturned() { Flags |= One << ReturnedOffs; } unsigned getByValAlign() const { return (unsigned) diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index e169bcf9e42..1e7ccd8f8e1 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -1910,16 +1910,18 @@ public: struct ArgListEntry { SDValue Node; Type* Ty; - bool isSExt : 1; - bool isZExt : 1; - bool isInReg : 1; - bool isSRet : 1; - bool isNest : 1; - bool isByVal : 1; + bool isSExt : 1; + bool isZExt : 1; + bool isInReg : 1; + bool isSRet : 1; + bool isNest : 1; + bool isByVal : 1; + bool isReturned : 1; uint16_t Alignment; ArgListEntry() : isSExt(false), isZExt(false), isInReg(false), - isSRet(false), isNest(false), isByVal(false), Alignment(0) { } + isSRet(false), isNest(false), isByVal(false), isReturned(false), + Alignment(0) { } }; typedef std::vector ArgListTy; diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index f46383be7e4..e7a9f2ad1e3 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -582,6 +582,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(optsize); KEYWORD(readnone); KEYWORD(readonly); + KEYWORD(returned); KEYWORD(returns_twice); KEYWORD(signext); KEYWORD(sret); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 8fa626dcc9d..92756bad82f 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -944,6 +944,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, case lltok::kw_nest: case lltok::kw_noalias: case lltok::kw_nocapture: + case lltok::kw_returned: case lltok::kw_sret: HaveError |= Error(Lex.getLoc(), @@ -1156,6 +1157,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { case lltok::kw_nest: B.addAttribute(Attribute::Nest); break; case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break; case lltok::kw_nocapture: B.addAttribute(Attribute::NoCapture); break; + case lltok::kw_returned: B.addAttribute(Attribute::Returned); break; case lltok::kw_signext: B.addAttribute(Attribute::SExt); break; case lltok::kw_sret: B.addAttribute(Attribute::StructRet); break; case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break; @@ -1199,6 +1201,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { case lltok::kw_byval: case lltok::kw_nest: case lltok::kw_nocapture: + case lltok::kw_returned: case lltok::kw_sret: HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute"); break; diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index cd25ba30008..3bf54fa1cc6 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -114,6 +114,7 @@ namespace lltok { kw_optsize, kw_readnone, kw_readonly, + kw_returned, kw_returns_twice, kw_signext, kw_ssp, diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 9723f8080c8..4731af5089a 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -261,6 +261,32 @@ static bool sameNoopInput(const Value *V1, const Value *V2, TLI.getPointerTy().getSizeInBits() == cast(I->getType())->getBitWidth()) NoopInput = Op; + } else if (isa(I)) { + // Look through call + for (User::const_op_iterator i = I->op_begin(), + // Skip Callee + e = I->op_end() - 1; + i != e; ++i) { + unsigned attrInd = i - I->op_begin() + 1; + if (cast(I)->paramHasAttr(attrInd, Attribute::Returned) && + isNoopBitcast((*i)->getType(), I->getType(), TLI)) { + NoopInput = *i; + break; + } + } + } else if (isa(I)) { + // Look through invoke + for (User::const_op_iterator i = I->op_begin(), + // Skip BB, BB, Callee + e = I->op_end() - 3; + i != e; ++i) { + unsigned attrInd = i - I->op_begin() + 1; + if (cast(I)->paramHasAttr(attrInd, Attribute::Returned) && + isNoopBitcast((*i)->getType(), I->getType(), TLI)) { + NoopInput = *i; + break; + } + } } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ce40cd6a0c9..c1c8be4387a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5232,6 +5232,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Entry.isSRet = true; Entry.isNest = false; Entry.isByVal = false; + Entry.isReturned = false; Entry.Alignment = Align; Args.push_back(Entry); RetTy = Type::getVoidTy(FTy->getContext()); @@ -5249,13 +5250,14 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Entry.Node = ArgNode; Entry.Ty = V->getType(); unsigned attrInd = i - CS.arg_begin() + 1; - Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); - Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); - Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); - Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); - Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); - Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); - Entry.Alignment = CS.getParamAlignment(attrInd); + Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); + Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); + Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); + Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); + Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); + Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); + Entry.isReturned = CS.paramHasAttr(attrInd, Attribute::Returned); + Entry.Alignment = CS.getParamAlignment(attrInd); Args.push_back(Entry); } @@ -6430,6 +6432,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } if (Args[i].isNest) Flags.setNest(); + if (Args[i].isReturned) + Flags.setReturned(); Flags.setOrigAlign(OriginalAlignment); MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 44fa78e4af6..80968dbd1f7 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -195,6 +195,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const { return "readnone"; if (hasAttribute(Attribute::ReadOnly)) return "readonly"; + if (hasAttribute(Attribute::Returned)) + return "returned"; if (hasAttribute(Attribute::ReturnsTwice)) return "returns_twice"; if (hasAttribute(Attribute::SExt)) @@ -393,6 +395,7 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { case Attribute::SanitizeThread: return 1ULL << 36; case Attribute::SanitizeMemory: return 1ULL << 37; case Attribute::NoBuiltin: return 1ULL << 38; + case Attribute::Returned: return 1ULL << 39; } llvm_unreachable("Unsupported attribute type"); } diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index 1e72b90a13c..7f7efabf765 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -124,6 +124,13 @@ bool Argument::hasStructRetAttr() const { hasAttribute(1, Attribute::StructRet); } +/// hasReturnedAttr - Return true if this argument has the returned attribute on +/// it in its containing function. +bool Argument::hasReturnedAttr() const { + return getParent()->getAttributes(). + hasAttribute(getArgNo()+1, Attribute::Returned); +} + /// addAttr - Add attributes to an argument. void Argument::addAttr(AttributeSet AS) { assert(AS.getNumSlots() <= 1 && diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index ec01edfb9aa..69cb5dc3817 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -694,8 +694,9 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty, Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) && !Attrs.hasAttribute(Idx, Attribute::Nest) && !Attrs.hasAttribute(Idx, Attribute::StructRet) && - !Attrs.hasAttribute(Idx, Attribute::NoCapture), - "Attribute 'byval', 'nest', 'sret', and 'nocapture' " + !Attrs.hasAttribute(Idx, Attribute::NoCapture) && + !Attrs.hasAttribute(Idx, Attribute::Returned), + "Attribute 'byval', 'nest', 'sret', 'nocapture', and 'returned' " "do not apply to return values!", V); // Check for mutually incompatible attributes. @@ -750,6 +751,7 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs, return; bool SawNest = false; + bool SawReturned = false; for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { unsigned Idx = Attrs.getSlotIndex(i); @@ -764,11 +766,22 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs, VerifyParameterAttrs(Attrs, Idx, Ty, Idx == 0, V); - if (Attrs.hasAttribute(i, Attribute::Nest)) { + if (Idx == 0) + continue; + + if (Attrs.hasAttribute(Idx, Attribute::Nest)) { Assert1(!SawNest, "More than one parameter has attribute nest!", V); SawNest = true; } + if (Attrs.hasAttribute(Idx, Attribute::Returned)) { + Assert1(!SawReturned, "More than one parameter has attribute returned!", + V); + Assert1(Ty->canLosslesslyBitCastTo(FT->getReturnType()), "Incompatible " + "argument and return types for 'returned' attribute", V); + SawReturned = true; + } + if (Attrs.hasAttribute(Idx, Attribute::StructRet)) Assert1(Idx == 1, "Attribute sret is not on first parameter!", V); } @@ -1348,15 +1361,41 @@ void Verifier::VerifyCallSite(CallSite CS) { // Verify call attributes. VerifyFunctionAttrs(FTy, Attrs, I); - if (FTy->isVarArg()) + if (FTy->isVarArg()) { + // FIXME? is 'nest' even legal here? + bool SawNest = false; + bool SawReturned = false; + + for (unsigned Idx = 1; Idx < 1 + FTy->getNumParams(); ++Idx) { + if (Attrs.hasAttribute(Idx, Attribute::Nest)) + SawNest = true; + if (Attrs.hasAttribute(Idx, Attribute::Returned)) + SawReturned = true; + } + // Check attributes on the varargs part. for (unsigned Idx = 1 + FTy->getNumParams(); Idx <= CS.arg_size(); ++Idx) { - VerifyParameterAttrs(Attrs, Idx, CS.getArgument(Idx-1)->getType(), - false, I); + Type *Ty = CS.getArgument(Idx-1)->getType(); + VerifyParameterAttrs(Attrs, Idx, Ty, false, I); + + if (Attrs.hasAttribute(Idx, Attribute::Nest)) { + Assert1(!SawNest, "More than one parameter has attribute nest!", I); + SawNest = true; + } + + if (Attrs.hasAttribute(Idx, Attribute::Returned)) { + Assert1(!SawReturned, "More than one parameter has attribute returned!", + I); + Assert1(Ty->canLosslesslyBitCastTo(FTy->getReturnType()), + "Incompatible argument and return types for 'returned' " + "attribute", I); + SawReturned = true; + } Assert1(!Attrs.hasAttribute(Idx, Attribute::StructRet), "Attribute 'sret' cannot be used for vararg call arguments!", I); } + } // Verify that there's no metadata unless it's a direct call to an intrinsic. if (CS.getCalledFunction() == 0 || diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index b6b27f849a2..b0d34a76b01 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -74,6 +74,12 @@ ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const { ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; } +const uint32_t* +ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const { + return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) + ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; +} + const uint32_t* ARMBaseRegisterInfo::getNoPreservedMask() const { return CSR_NoRegs_RegMask; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 725033b7e57..0679919152c 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -96,6 +96,7 @@ public: /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID) const; + const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; const uint32_t *getNoPreservedMask() const; BitVector getReservedRegs(const MachineFunction &MF) const; diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index b378b966268..9966f6c3f6d 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -195,10 +195,21 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>; def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, (sequence "D%u", 15, 8))>; +// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this' +// and the pointer return value are both passed in R0 in these cases, this can +// be partially modelled by treating R0 as a callee-saved register +// Only the resulting RegMask is used; the SaveList is ignored +def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, + R5, R4, (sequence "D%u", 15, 8), + R0)>; + // iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register. // Also save R7-R4 first to match the stack frame fixed spill areas. def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; +def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, + (sub CSR_AAPCS_ThisReturn, R9))>; + // GHC set of callee saved regs is empty as all those regs are // used for passing STG regs around // add is a workaround for not being able to compile empty list: diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 7e7d0248b99..ffb880a6936 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1238,7 +1238,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const { + SmallVectorImpl &InVals, + bool isThisReturn, SDValue ThisVal) const { // Assign locations to each value returned by this call. SmallVector RVLocs; @@ -1252,6 +1253,14 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign VA = RVLocs[i]; + // Pass 'this' value directly from the argument to return value, to avoid + // reg unit interference + if (i == 0 && isThisReturn) { + assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32); + InVals.push_back(ThisVal); + continue; + } + SDValue Val; if (VA.needsCustom()) { // Handle f64 or half of a v2f64. @@ -1364,7 +1373,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, MachineFunction &MF = DAG.getMachineFunction(); bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); - bool IsSibCall = false; + bool IsThisReturn = false; + bool IsSibCall = false; // Disable tail calls if they're not supported. if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) isTailCall = false; @@ -1460,6 +1470,11 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, StackPtr, MemOpChains, Flags); } } else if (VA.isRegLoc()) { + if (realArgIdx == 0 && Flags.isReturned() && VA.getLocVT() == MVT::i32) { + assert(!Ins.empty() && Ins[0].VT == Outs[0].VT && + "unexpected use of 'returned'"); + IsThisReturn = true; + } RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else if (isByVal) { assert(VA.isMemLoc()); @@ -1680,8 +1695,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. + const uint32_t *Mask; const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + const ARMBaseRegisterInfo *ARI = static_cast(TRI); + if (IsThisReturn) + // For 'this' returns, use the R0-preserving mask + Mask = ARI->getThisReturnPreservedMask(CallConv); + else + Mask = ARI->getCallPreservedMask(CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -1703,8 +1725,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Handle result values, copying them out of physregs into vregs that we // return. - return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, - dl, DAG, InVals); + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, + InVals, IsThisReturn, + IsThisReturn ? OutVals[0] : SDValue()); } /// HandleByVal - Every parameter *after* a byval parameter is passed diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 9ee17f0781b..015416c7026 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -464,7 +464,8 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; + SmallVectorImpl &InVals, + bool isThisReturn, SDValue ThisVal) const; virtual SDValue LowerFormalArguments(SDValue Chain, diff --git a/test/CodeGen/ARM/this-return.ll b/test/CodeGen/ARM/this-return.ll new file mode 100644 index 00000000000..0266153b941 --- /dev/null +++ b/test/CodeGen/ARM/this-return.ll @@ -0,0 +1,91 @@ +; RUN: llc < %s -mtriple=armv6-linux-gnueabi -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF +; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D + +%struct.A = type { i8 } +%struct.B = type { i32 } +%struct.C = type { %struct.B } +%struct.D = type { %struct.B } + +declare %struct.A* @A_ctor_base(%struct.A* returned) +declare %struct.B* @B_ctor_base(%struct.B* returned, i32) +declare %struct.B* @B_ctor_complete(%struct.B* returned, i32) + +declare %struct.A* @A_ctor_base_nothisret(%struct.A*) +declare %struct.B* @B_ctor_base_nothisret(%struct.B*, i32) +declare %struct.B* @B_ctor_complete_nothisret(%struct.B*, i32) + +define %struct.C* @C_ctor_base(%struct.C* returned %this, i32 %x) { +entry: +; CHECKELF: C_ctor_base: +; CHECKELF-NOT: mov {{r[0-9]+}}, r0 +; CHECKELF: bl A_ctor_base +; CHECKELF-NOT: mov r0, {{r[0-9]+}} +; CHECKELF: b B_ctor_base +; CHECKT2D: C_ctor_base: +; CHECKT2D-NOT: mov {{r[0-9]+}}, r0 +; CHECKT2D: blx _A_ctor_base +; CHECKT2D-NOT: mov r0, {{r[0-9]+}} +; CHECKT2D: b.w _B_ctor_base + %0 = bitcast %struct.C* %this to %struct.A* + %call = tail call %struct.A* @A_ctor_base(%struct.A* %0) + %1 = getelementptr inbounds %struct.C* %this, i32 0, i32 0 + %call2 = tail call %struct.B* @B_ctor_base(%struct.B* %1, i32 %x) + ret %struct.C* %this +} + +define %struct.C* @C_ctor_base_nothisret(%struct.C* %this, i32 %x) { +entry: +; CHECKELF: C_ctor_base_nothisret: +; CHECKELF: mov [[SAVETHIS:r[0-9]+]], r0 +; CHECKELF: bl A_ctor_base_nothisret +; CHECKELF: mov r0, [[SAVETHIS]] +; CHECKELF-NOT: b B_ctor_base_nothisret +; CHECKT2D: C_ctor_base_nothisret: +; CHECKT2D: mov [[SAVETHIS:r[0-9]+]], r0 +; CHECKT2D: blx _A_ctor_base_nothisret +; CHECKT2D: mov r0, [[SAVETHIS]] +; CHECKT2D-NOT: b.w _B_ctor_base_nothisret + %0 = bitcast %struct.C* %this to %struct.A* + %call = tail call %struct.A* @A_ctor_base_nothisret(%struct.A* %0) + %1 = getelementptr inbounds %struct.C* %this, i32 0, i32 0 + %call2 = tail call %struct.B* @B_ctor_base_nothisret(%struct.B* %1, i32 %x) + ret %struct.C* %this +} + +define %struct.C* @C_ctor_complete(%struct.C* %this, i32 %x) { +entry: +; CHECKELF: C_ctor_complete: +; CHECKELF: b C_ctor_base +; CHECKT2D: C_ctor_complete: +; CHECKT2D: b.w _C_ctor_base + %call = tail call %struct.C* @C_ctor_base(%struct.C* %this, i32 %x) + ret %struct.C* %this +} + +define %struct.C* @C_ctor_complete_nothisret(%struct.C* %this, i32 %x) { +entry: +; CHECKELF: C_ctor_complete_nothisret: +; CHECKELF-NOT: b C_ctor_base_nothisret +; CHECKT2D: C_ctor_complete_nothisret: +; CHECKT2D-NOT: b.w _C_ctor_base_nothisret + %call = tail call %struct.C* @C_ctor_base_nothisret(%struct.C* %this, i32 %x) + ret %struct.C* %this +} + +define %struct.D* @D_ctor_base(%struct.D* %this, i32 %x) { +entry: +; CHECKELF: D_ctor_base: +; CHECKELF-NOT: mov {{r[0-9]+}}, r0 +; CHECKELF: bl B_ctor_complete +; CHECKELF-NOT: mov r0, {{r[0-9]+}} +; CHECKELF: b B_ctor_complete +; CHECKT2D: D_ctor_base: +; CHECKT2D-NOT: mov {{r[0-9]+}}, r0 +; CHECKT2D: blx _B_ctor_complete +; CHECKT2D-NOT: mov r0, {{r[0-9]+}} +; CHECKT2D: b.w _B_ctor_complete + %b = getelementptr inbounds %struct.D* %this, i32 0, i32 0 + %call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x) + %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x) + ret %struct.D* %this +} diff --git a/test/CodeGen/X86/this-return-64.ll b/test/CodeGen/X86/this-return-64.ll new file mode 100644 index 00000000000..2b26a89e3c8 --- /dev/null +++ b/test/CodeGen/X86/this-return-64.ll @@ -0,0 +1,89 @@ +; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s + +%struct.A = type { i8 } +%struct.B = type { i32 } +%struct.C = type { %struct.B } +%struct.D = type { %struct.B } +%struct.E = type { %struct.B } + +declare %struct.A* @A_ctor(%struct.A* returned) +declare %struct.B* @B_ctor(%struct.B* returned, i32) + +declare %struct.A* @A_ctor_nothisret(%struct.A*) +declare %struct.B* @B_ctor_nothisret(%struct.B*, i32) + +define %struct.C* @C_ctor(%struct.C* %this, i32 %y) { +entry: +; CHECK: C_ctor: +; CHECK: jmp B_ctor # TAILCALL + %0 = getelementptr inbounds %struct.C* %this, i64 0, i32 0 + %call = tail call %struct.B* @B_ctor(%struct.B* %0, i32 %y) + ret %struct.C* %this +} + +define %struct.C* @C_ctor_nothisret(%struct.C* %this, i32 %y) { +entry: +; CHECK: C_ctor_nothisret: +; CHECK-NOT: jmp B_ctor_nothisret + %0 = getelementptr inbounds %struct.C* %this, i64 0, i32 0 + %call = tail call %struct.B* @B_ctor_nothisret(%struct.B* %0, i32 %y) + ret %struct.C* %this +} + +define %struct.D* @D_ctor(%struct.D* %this, i32 %y) { +entry: +; CHECK: D_ctor: +; CHECK: movq %rcx, [[SAVETHIS:%r[0-9a-z]+]] +; CHECK: callq A_ctor +; CHECK: movq [[SAVETHIS]], %rcx +; CHECK: jmp B_ctor # TAILCALL + %0 = bitcast %struct.D* %this to %struct.A* + %call = tail call %struct.A* @A_ctor(%struct.A* %0) + %1 = getelementptr inbounds %struct.D* %this, i64 0, i32 0 + %call2 = tail call %struct.B* @B_ctor(%struct.B* %1, i32 %y) +; (this next line would never be generated by Clang, actually) + %2 = bitcast %struct.A* %call to %struct.D* + ret %struct.D* %2 +} + +define %struct.D* @D_ctor_nothisret(%struct.D* %this, i32 %y) { +entry: +; CHECK: D_ctor_nothisret: +; CHECK: movq %rcx, [[SAVETHIS:%r[0-9a-z]+]] +; CHECK: callq A_ctor_nothisret +; CHECK: movq [[SAVETHIS]], %rcx +; CHECK-NOT: jmp B_ctor_nothisret + %0 = bitcast %struct.D* %this to %struct.A* + %call = tail call %struct.A* @A_ctor_nothisret(%struct.A* %0) + %1 = getelementptr inbounds %struct.D* %this, i64 0, i32 0 + %call2 = tail call %struct.B* @B_ctor_nothisret(%struct.B* %1, i32 %y) +; (this next line would never be generated by Clang, actually) + %2 = bitcast %struct.A* %call to %struct.D* + ret %struct.D* %2 +} + +define %struct.E* @E_ctor(%struct.E* %this, i32 %x) { +entry: +; CHECK: E_ctor: +; CHECK: movq %rcx, [[SAVETHIS:%r[0-9a-z]+]] +; CHECK: callq B_ctor +; CHECK: movq [[SAVETHIS]], %rcx +; CHECK: jmp B_ctor # TAILCALL + %b = getelementptr inbounds %struct.E* %this, i64 0, i32 0 + %call = tail call %struct.B* @B_ctor(%struct.B* %b, i32 %x) + %call4 = tail call %struct.B* @B_ctor(%struct.B* %b, i32 %x) + ret %struct.E* %this +} + +define %struct.E* @E_ctor_nothisret(%struct.E* %this, i32 %x) { +entry: +; CHECK: E_ctor_nothisret: +; CHECK: movq %rcx, [[SAVETHIS:%r[0-9a-z]+]] +; CHECK: callq B_ctor_nothisret +; CHECK: movq [[SAVETHIS]], %rcx +; CHECK-NOT: jmp B_ctor_nothisret + %b = getelementptr inbounds %struct.E* %this, i64 0, i32 0 + %call = tail call %struct.B* @B_ctor_nothisret(%struct.B* %b, i32 %x) + %call4 = tail call %struct.B* @B_ctor_nothisret(%struct.B* %b, i32 %x) + ret %struct.E* %this +} -- 2.34.1