From f48ef0365545b6160836e3f4b4a210d1e21f1881 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Sun, 14 Mar 2010 03:48:46 +0000 Subject: [PATCH] Do not force indirect tailcall through fixed registers: eax, r11. Add support to allow loads to be folded to tail call instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@98465 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 37 +++++++----- lib/Target/X86/X86ISelLowering.cpp | 20 ------- lib/Target/X86/X86Instr64bit.td | 81 ++++++++++++++++++------- lib/Target/X86/X86InstrInfo.cpp | 17 ++++++ lib/Target/X86/X86InstrInfo.td | 83 ++++++++++++++++++-------- lib/Target/X86/X86RegisterInfo.cpp | 25 +++++--- lib/Target/X86/X86RegisterInfo.td | 7 +++ test/CodeGen/X86/tailcall-largecode.ll | 8 +-- test/CodeGen/X86/tailcallfp2.ll | 2 +- 9 files changed, 188 insertions(+), 92 deletions(-) diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 3fad8ade41b..40588856525 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -349,17 +349,17 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { return true; } -/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain -/// operand and move load below the call's chain operand. -static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load, - SDValue Call, SDValue CallSeqStart) { +/// MoveBelowCallOrigChain - Replace the original chain operand of the call with +/// load's chain operand and move load below the call's chain operand. +static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, + SDValue Call, SDValue OrigChain) { SmallVector Ops; - SDValue Chain = CallSeqStart.getOperand(0); + SDValue Chain = OrigChain.getOperand(0); if (Chain.getNode() == Load.getNode()) Ops.push_back(Load.getOperand(0)); else { assert(Chain.getOpcode() == ISD::TokenFactor && - "Unexpected CallSeqStart chain operand"); + "Unexpected chain operand"); for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) if (Chain.getOperand(i).getNode() == Load.getNode()) Ops.push_back(Load.getOperand(0)); @@ -371,9 +371,9 @@ static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load, Ops.clear(); Ops.push_back(NewChain); } - for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i) - Ops.push_back(CallSeqStart.getOperand(i)); - CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size()); + for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i) + Ops.push_back(OrigChain.getOperand(i)); + CurDAG->UpdateNodeOperands(OrigChain, &Ops[0], Ops.size()); CurDAG->UpdateNodeOperands(Load, Call.getOperand(0), Load.getOperand(1), Load.getOperand(2)); Ops.clear(); @@ -386,7 +386,9 @@ static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load, /// isCalleeLoad - Return true if call address is a load and it can be /// moved below CALLSEQ_START and the chains leading up to the call. /// Return the CALLSEQ_START by reference as a second output. -static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { +/// In the case of a tail call, there isn't a callseq node between the call +/// chain and the load. +static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) return false; LoadSDNode *LD = dyn_cast(Callee.getNode()); @@ -397,12 +399,14 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { return false; // Now let's find the callseq_start. - while (Chain.getOpcode() != ISD::CALLSEQ_START) { + while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) { if (!Chain.hasOneUse()) return false; Chain = Chain.getOperand(0); } - + + if (!Chain.getNumOperands()) + return false; if (Chain.getOperand(0).getNode() == Callee.getNode()) return true; if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && @@ -420,7 +424,9 @@ void X86DAGToDAGISel::PreprocessISelDAG() { E = CurDAG->allnodes_end(); I != E; ) { SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. - if (OptLevel != CodeGenOpt::None && N->getOpcode() == X86ISD::CALL) { + if (OptLevel != CodeGenOpt::None && + (N->getOpcode() == X86ISD::CALL || + N->getOpcode() == X86ISD::TC_RETURN)) { /// Also try moving call address load from outside callseq_start to just /// before the call to allow it to be folded. /// @@ -440,11 +446,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() { /// \ / /// \ / /// [CALL] + bool HasCallSeq = N->getOpcode() == X86ISD::CALL; SDValue Chain = N->getOperand(0); SDValue Load = N->getOperand(1); - if (!isCalleeLoad(Load, Chain)) + if (!isCalleeLoad(Load, Chain, HasCallSeq)) continue; - MoveBelowCallSeqStart(CurDAG, Load, SDValue(N, 0), Chain); + MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain); ++NumLoadMoved; continue; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 669c4b1afaf..528bfaf56b5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2133,18 +2133,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, OpFlags); } - if (isTailCall && !WasGlobalOrExternal) { - // Force the address into a (call preserved) caller-saved register since - // tailcall must happen after callee-saved registers are poped. - // FIXME: Give it a special register class that contains caller-saved - // register instead? - unsigned TCReg = Is64Bit ? X86::R11 : X86::EAX; - Chain = DAG.getCopyToReg(Chain, dl, - DAG.getRegister(TCReg, getPointerTy()), - Callee,InFlag); - Callee = DAG.getRegister(TCReg, getPointerTy()); - } - // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); SmallVector Ops; @@ -2190,14 +2178,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (RVLocs[i].isRegLoc()) MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg()); } - - assert(((Callee.getOpcode() == ISD::Register && - (cast(Callee)->getReg() == X86::EAX || - cast(Callee)->getReg() == X86::R11)) || - Callee.getOpcode() == ISD::TargetExternalSymbol || - Callee.getOpcode() == ISD::TargetGlobalAddress) && - "Expecting a global address, external symbol, or scratch register"); - return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); } diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index dc5b1120fc9..4b071f12261 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -33,6 +33,15 @@ def i64i8imm : Operand { let ParserMatchClass = ImmSExt8AsmOperand; } +// Special i64mem for addresses of load folding tail calls. These are not +// allowed to use callee-saved registers since they must be scheduled +// after callee-saved register are popped. +def i64mem_TC : Operand { + let PrintMethod = "printi64mem"; + let MIOperandInfo = (ops GR64_TC, i8imm, GR64_TC, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; +} + def lea64mem : Operand { let PrintMethod = "printlea64mem"; let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm); @@ -177,22 +186,31 @@ let isCall = 1 in let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in -def TCRETURNdi64 : I<0, Pseudo, (outs), (ins i64imm:$dst, i32imm:$offset, - variable_ops), - "#TC_RETURN $dst $offset", - []>; - -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in -def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64:$dst, i32imm:$offset, - variable_ops), - "#TC_RETURN $dst $offset", - []>; - - -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64:$dst, variable_ops), - "jmp{q}\t{*}$dst # TAILCALL", - []>; + let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, + FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [RSP] in { + def TCRETURNdi64 : I<0, Pseudo, (outs), + (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops), + "#TC_RETURN $dst $offset", []>; + def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64_TC:$dst, i32imm:$offset, + variable_ops), + "#TC_RETURN $dst $offset", []>; + def TCRETURNmi64 : I<0, Pseudo, (outs), + (ins i64mem_TC:$dst, i32imm:$offset, variable_ops), + "#TC_RETURN $dst $offset", []>; + + def TAILJMPd64 : Ii32<0xE9, RawFrm, (outs), + (ins i64i32imm_pcrel:$dst, variable_ops), + "jmp\t$dst # TAILCALL", []>; + def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops), + "jmp{q}\t{*}$dst # TAILCALL", []>; + + def TAILJMPm64 : I<0xff, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops), + "jmp{q}\t{*}$dst # TAILCALL", []>; +} // Branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { @@ -340,6 +358,22 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(store i64immSExt32:$src, addr:$dst)]>; +/// Versions of MOV64rr, MOV64rm, and MOV64mr for i64mem_TC and GR64_TC. +let neverHasSideEffects = 1 in +def MOV64rr_TC : I<0x89, MRMDestReg, (outs GR64_TC:$dst), (ins GR64_TC:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>; + +let mayLoad = 1, + canFoldAsLoad = 1, isReMaterializable = 1 in +def MOV64rm_TC : I<0x8B, MRMSrcMem, (outs GR64_TC:$dst), (ins i64mem_TC:$src), + "mov{q}\t{$src, $dst|$dst, $src}", + []>; + +let mayStore = 1 in +def MOV64mr_TC : I<0x89, MRMDestMem, (outs), (ins i64mem_TC:$dst, GR64_TC:$src), + "mov{q}\t{$src, $dst|$dst, $src}", + []>; + def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src), "mov{q}\t{$src, %rax|%rax, $src}", []>; def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src), @@ -1885,14 +1919,21 @@ def : Pat<(X86call (i64 texternalsym:$dst)), (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>; // tailcall stuff -def : Pat<(X86tcret GR64:$dst, imm:$off), - (TCRETURNri64 GR64:$dst, imm:$off)>; +def : Pat<(X86tcret GR64_TC:$dst, imm:$off), + (TCRETURNri64 GR64_TC:$dst, imm:$off)>, + Requires<[In64BitMode]>; + +def : Pat<(X86tcret (load addr:$dst), imm:$off), + (TCRETURNmi64 addr:$dst, imm:$off)>, + Requires<[In64BitMode]>; def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off), - (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>; + (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>, + Requires<[In64BitMode]>; def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off), - (TCRETURNdi64 texternalsym:$dst, imm:$off)>; + (TCRETURNdi64 texternalsym:$dst, imm:$off)>, + Requires<[In64BitMode]>; // Comparisons. diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 4fd91bbc2fd..139a905ec38 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -266,6 +266,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOV16rr, X86::MOV16mr, 0, 0 }, { X86::MOV32ri, X86::MOV32mi, 0, 0 }, { X86::MOV32rr, X86::MOV32mr, 0, 0 }, + { X86::MOV32rr_TC, X86::MOV32mr_TC, 0, 0 }, { X86::MOV64ri32, X86::MOV64mi32, 0, 0 }, { X86::MOV64rr, X86::MOV64mr, 0, 0 }, { X86::MOV8ri, X86::MOV8mi, 0, 0 }, @@ -301,6 +302,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::SETPr, X86::SETPm, 0, 0 }, { X86::SETSr, X86::SETSm, 0, 0 }, { X86::TAILJMPr, X86::TAILJMPm, 1, 0 }, + { X86::TAILJMPr64, X86::TAILJMPm64, 1, 0 }, { X86::TEST16ri, X86::TEST16mi, 1, 0 }, { X86::TEST32ri, X86::TEST32mi, 1, 0 }, { X86::TEST64ri32, X86::TEST64mi32, 1, 0 }, @@ -376,6 +378,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 }, { X86::MOV16rr, X86::MOV16rm, 0 }, { X86::MOV32rr, X86::MOV32rm, 0 }, + { X86::MOV32rr_TC, X86::MOV32rm_TC, 0 }, { X86::MOV64rr, X86::MOV64rm, 0 }, { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, @@ -675,6 +678,8 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, case X86::MOV16rr: case X86::MOV32rr: case X86::MOV64rr: + case X86::MOV32rr_TC: + case X86::MOV64rr_TC: // FP Stack register class copies case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080: @@ -1901,6 +1906,10 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, Opc = X86::MOV16rr; } else if (CommonRC == &X86::GR8_NOREXRegClass) { Opc = X86::MOV8rr; + } else if (CommonRC == &X86::GR64_TCRegClass) { + Opc = X86::MOV64rr_TC; + } else if (CommonRC == &X86::GR32_TCRegClass) { + Opc = X86::MOV32rr_TC; } else if (CommonRC == &X86::RFP32RegClass) { Opc = X86::MOV_Fp3232; } else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) { @@ -2038,6 +2047,10 @@ static unsigned getStoreRegOpcode(unsigned SrcReg, Opc = X86::MOV16mr; } else if (RC == &X86::GR8_NOREXRegClass) { Opc = X86::MOV8mr; + } else if (RC == &X86::GR64_TCRegClass) { + Opc = X86::MOV64mr_TC; + } else if (RC == &X86::GR32_TCRegClass) { + Opc = X86::MOV32mr_TC; } else if (RC == &X86::RFP80RegClass) { Opc = X86::ST_FpP80m; // pops } else if (RC == &X86::RFP64RegClass) { @@ -2131,6 +2144,10 @@ static unsigned getLoadRegOpcode(unsigned DestReg, Opc = X86::MOV16rm; } else if (RC == &X86::GR8_NOREXRegClass) { Opc = X86::MOV8rm; + } else if (RC == &X86::GR64_TCRegClass) { + Opc = X86::MOV64rm_TC; + } else if (RC == &X86::GR32_TCRegClass) { + Opc = X86::MOV32rm_TC; } else if (RC == &X86::RFP80RegClass) { Opc = X86::LD_Fp80m; } else if (RC == &X86::RFP64RegClass) { diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 071c5aa31ac..e6a240d6009 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -234,6 +234,15 @@ def i8mem_NOREX : Operand { let ParserMatchClass = X86MemAsmOperand; } +// Special i32mem for addresses of load folding tail calls. These are not +// allowed to use callee-saved registers since they must be scheduled +// after callee-saved register are popped. +def i32mem_TC : Operand { + let PrintMethod = "printi32mem"; + let MIOperandInfo = (ops GR32_TC, i8imm, GR32_TC, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; +} + def lea32mem : Operand { let PrintMethod = "printlea32mem"; let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm); @@ -696,30 +705,33 @@ def ENTER : I<0xC8, RawFrm, (outs), (ins i16imm:$len, i8imm:$lvl), // Tail call stuff. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in -def TCRETURNdi : I<0, Pseudo, (outs), - (ins i32imm:$dst, i32imm:$offset, variable_ops), - "#TC_RETURN $dst $offset", - []>; - -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in -def TCRETURNri : I<0, Pseudo, (outs), - (ins GR32:$dst, i32imm:$offset, variable_ops), - "#TC_RETURN $dst $offset", - []>; - -// FIXME: The should be pseudo instructions that are lowered when going to -// mcinst. -let isCall = 1, isBranch = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPd : Ii32<0xE9, RawFrm, (outs),(ins i32imm_pcrel:$dst,variable_ops), + let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [ESP] in { + def TCRETURNdi : I<0, Pseudo, (outs), + (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops), + "#TC_RETURN $dst $offset", []>; + def TCRETURNri : I<0, Pseudo, (outs), + (ins GR32_TC:$dst, i32imm:$offset, variable_ops), + "#TC_RETURN $dst $offset", []>; + def TCRETURNmi : I<0, Pseudo, (outs), + (ins i32mem_TC:$dst, i32imm:$offset, variable_ops), + "#TC_RETURN $dst $offset", []>; + + // FIXME: The should be pseudo instructions that are lowered when going to + // mcinst. + def TAILJMPd : Ii32<0xE9, RawFrm, (outs), + (ins i32imm_pcrel:$dst, variable_ops), "jmp\t$dst # TAILCALL", []>; -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst, variable_ops), + def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops), "jmp{l}\t{*}$dst # TAILCALL", []>; -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem:$dst, variable_ops), - "jmp\t{*}$dst # TAILCALL", []>; + def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops), + "jmp{l}\t{*}$dst # TAILCALL", []>; +} //===----------------------------------------------------------------------===// // Miscellaneous Instructions... @@ -1032,6 +1044,22 @@ def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(store GR32:$src, addr:$dst)]>; +/// Versions of MOV32rr, MOV32rm, and MOV32mr for i32mem_TC and GR32_TC. +let neverHasSideEffects = 1 in +def MOV32rr_TC : I<0x89, MRMDestReg, (outs GR32_TC:$dst), (ins GR32_TC:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; + +let mayLoad = 1, + canFoldAsLoad = 1, isReMaterializable = 1 in +def MOV32rm_TC : I<0x8B, MRMSrcMem, (outs GR32_TC:$dst), (ins i32mem_TC:$src), + "mov{l}\t{$src, $dst|$dst, $src}", + []>; + +let mayStore = 1 in +def MOV32mr_TC : I<0x89, MRMDestMem, (outs), (ins i32mem_TC:$dst, GR32_TC:$src), + "mov{l}\t{$src, $dst|$dst, $src}", + []>; + // Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so // that they can be used for copying and storing h registers, which can't be // encoded when a REX prefix is present. @@ -4294,14 +4322,21 @@ def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst), // Calls // tailcall stuff -def : Pat<(X86tcret GR32:$dst, imm:$off), - (TCRETURNri GR32:$dst, imm:$off)>; +def : Pat<(X86tcret GR32_TC:$dst, imm:$off), + (TCRETURNri GR32_TC:$dst, imm:$off)>, + Requires<[In32BitMode]>; + +def : Pat<(X86tcret (load addr:$dst), imm:$off), + (TCRETURNmi addr:$dst, imm:$off)>, + Requires<[In32BitMode]>; def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), - (TCRETURNdi texternalsym:$dst, imm:$off)>; + (TCRETURNdi texternalsym:$dst, imm:$off)>, + Requires<[In32BitMode]>; def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off), - (TCRETURNdi texternalsym:$dst, imm:$off)>; + (TCRETURNdi texternalsym:$dst, imm:$off)>, + Requires<[In32BitMode]>; // Normal calls, with various flavors of addresses. def : Pat<(X86call (i32 tglobaladdr:$dst)), diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index d3a5b15d8fd..f46c631e3cb 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -1138,13 +1138,12 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, case X86::RETI: case X86::TCRETURNdi: case X86::TCRETURNri: - case X86::TCRETURNri64: + case X86::TCRETURNmi: case X86::TCRETURNdi64: + case X86::TCRETURNri64: + case X86::TCRETURNmi64: case X86::EH_RETURN: case X86::EH_RETURN64: - case X86::TAILJMPd: - case X86::TAILJMPr: - case X86::TAILJMPm: break; // These are ok } @@ -1229,11 +1228,14 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr).addReg(DestAddr.getReg()); } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || - RetOpcode== X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64) { + RetOpcode == X86::TCRETURNmi || + RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 || + RetOpcode == X86::TCRETURNmi64) { + bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64; // Tail call return: adjust the stack pointer and jump to callee. MBBI = prior(MBB.end()); MachineOperand &JumpTarget = MBBI->getOperand(0); - MachineOperand &StackAdjust = MBBI->getOperand(1); + MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); assert(StackAdjust.isImm() && "Expecting immediate value."); // Adjust stack pointer. @@ -1253,10 +1255,17 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, } // Jump to label or value in register. - if (RetOpcode == X86::TCRETURNdi|| RetOpcode == X86::TCRETURNdi64) { - BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPd)). + if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) { + BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi) + ? X86::TAILJMPd : X86::TAILJMPd64)). addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); + } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi) + ? X86::TAILJMPm : X86::TAILJMPm64)); + for (unsigned i = 0; i != 5; ++i) + MIB.addOperand(MBBI->getOperand(i)); } else if (RetOpcode == X86::TCRETURNri64) { BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg()); } else { diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index ed2ce6c353d..76b8f7a953c 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -535,6 +535,13 @@ def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> { def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> { let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD]; } +def GR32_TC : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX]> { + let SubRegClassList = [GR8, GR8, GR16]; +} +def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, + R8, R9, R11]> { + let SubRegClassList = [GR8, GR8, GR16, GR32_TC]; +} // GR8_NOREX - GR8 registers which do not require a REX prefix. def GR8_NOREX : RegisterClass<"X86", [i8], 8, diff --git a/test/CodeGen/X86/tailcall-largecode.ll b/test/CodeGen/X86/tailcall-largecode.ll index 8ddc4054ca8..c7070f2abd2 100644 --- a/test/CodeGen/X86/tailcall-largecode.ll +++ b/test/CodeGen/X86/tailcall-largecode.ll @@ -20,7 +20,7 @@ define fastcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target) ; CHECK: subq $8, %rsp ; Put the call target into R11, which won't be clobbered while restoring ; callee-saved registers and won't be used for passing arguments. -; CHECK: movq %rdi, %r11 +; CHECK: movq %rdi, %rax ; Pass the stack argument. ; CHECK: movl $7, 16(%rsp) ; Pass the register arguments, in the right registers. @@ -33,7 +33,7 @@ define fastcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target) ; Adjust the stack to "return". ; CHECK: addq $8, %rsp ; And tail-call to the target. -; CHECK: jmpq *%r11 # TAILCALL +; CHECK: jmpq *%rax # TAILCALL %res = tail call fastcc i32 %target(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7) ret i32 %res @@ -60,11 +60,11 @@ define fastcc i32 @direct_manyargs() { ; the jmp instruction. Put it into R11, which won't be clobbered ; while restoring callee-saved registers and won't be used for passing ; arguments. -; CHECK: movabsq $manyargs_callee, %r11 +; CHECK: movabsq $manyargs_callee, %rax ; Adjust the stack to "return". ; CHECK: addq $8, %rsp ; And tail-call to the target. -; CHECK: jmpq *%r11 # TAILCALL +; CHECK: jmpq *%rax # TAILCALL %res = tail call fastcc i32 @manyargs_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7) ret i32 %res diff --git a/test/CodeGen/X86/tailcallfp2.ll b/test/CodeGen/X86/tailcallfp2.ll index 3841f518976..4ec127f81ac 100644 --- a/test/CodeGen/X86/tailcallfp2.ll +++ b/test/CodeGen/X86/tailcallfp2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -tailcallopt | grep {jmp} | grep {\\*%eax} +; RUN: llc < %s -march=x86 -tailcallopt | grep {jmp} | grep {\\*%edx} declare i32 @putchar(i32) -- 2.34.1