From e35992951792e6951612301becf6b0ac2e77f30d Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Fri, 30 Jan 2015 21:03:31 +0000 Subject: [PATCH] Win64: Put a REX_W prefix on all TAILJMP* instructions MSDN's x64 software conventions page says that this is one of the fixed list of legal epilogues: https://msdn.microsoft.com/en-us/library/tawsa7cb.aspx Presumably this is how the unwinder distinguishes epilogue jumps from in-function control flow. Also normalize the way we place "## TAILCALL" comments on such jumps. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227611 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 20 +++++++++------- lib/Target/X86/X86ISelLowering.cpp | 3 +++ lib/Target/X86/X86InstrControl.td | 26 +++++++++++++++------ lib/Target/X86/X86InstrInfo.cpp | 1 + lib/Target/X86/X86InstrInfo.td | 1 + lib/Target/X86/X86MCInstLower.cpp | 7 ++++++ test/CodeGen/X86/tail-call-win64.ll | 36 +++++++++++++++++++++++++++++ test/CodeGen/X86/tailcall-64.ll | 4 ++-- 8 files changed, 81 insertions(+), 17 deletions(-) create mode 100644 test/CodeGen/X86/tail-call-win64.ll diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index f64e5d45dfb..930163c3688 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1123,10 +1123,12 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } // Jump to label or value in register. + bool IsWin64 = STI.isTargetWin64(); if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) { - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi) - ? X86::TAILJMPd : X86::TAILJMPd64)); + unsigned Op = (RetOpcode == X86::TCRETURNdi) + ? X86::TAILJMPd + : (IsWin64 ? X86::TAILJMPd64_REX : X86::TAILJMPd64); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(Op)); if (JumpTarget.isGlobal()) MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); @@ -1136,14 +1138,16 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, JumpTarget.getTargetFlags()); } } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) { - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi) - ? X86::TAILJMPm : X86::TAILJMPm64)); + unsigned Op = (RetOpcode == X86::TCRETURNmi) + ? X86::TAILJMPm + : (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(Op)); for (unsigned i = 0; i != 5; ++i) MIB.addOperand(MBBI->getOperand(i)); } else if (RetOpcode == X86::TCRETURNri64) { - BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)). - addReg(JumpTarget.getReg(), RegState::Kill); + BuildMI(MBB, MBBI, DL, + TII.get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64)) + .addReg(JumpTarget.getReg(), RegState::Kill); } else { BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). addReg(JumpTarget.getReg(), RegState::Kill); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f1032887051..cabeba6ce34 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -21441,6 +21441,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::TAILJMPd64: case X86::TAILJMPr64: case X86::TAILJMPm64: + case X86::TAILJMPd64_REX: + case X86::TAILJMPr64_REX: + case X86::TAILJMPm64_REX: llvm_unreachable("TAILJMP64 would not be touched here."); case X86::TCRETURNdi64: case X86::TCRETURNri64: diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index e1c36a82119..6ab961f04ec 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -240,13 +240,13 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, // mcinst. def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs), (ins i32imm_pcrel:$dst), - "jmp\t$dst # TAILCALL", + "jmp\t$dst", [], IIC_JMP_REL>; def TAILJMPr : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst), "", [], IIC_JMP_REG>; // FIXME: Remove encoding when JIT is dead. let mayLoad = 1 in def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst), - "jmp{l}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>; + "jmp{l}\t{*}$dst", [], IIC_JMP_MEM>; } @@ -290,13 +290,25 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, def TCRETURNmi64 : PseudoI<(outs), (ins i64mem_TC:$dst, i32imm:$offset), []>; - def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), - (ins i64i32imm_pcrel:$dst), - "jmp\t$dst # TAILCALL", [], IIC_JMP_REL>; + def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), (ins i64i32imm_pcrel:$dst), + "jmp\t$dst", [], IIC_JMP_REL>; def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst), - "jmp{q}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>; + "jmp{q}\t{*}$dst", [], IIC_JMP_MEM>; let mayLoad = 1 in def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst), - "jmp{q}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>; + "jmp{q}\t{*}$dst", [], IIC_JMP_MEM>; + + // Win64 wants jumps leaving the function to have a REX_W prefix. + let hasREX_WPrefix = 1 in { + def TAILJMPd64_REX : Ii32PCRel<0xE9, RawFrm, (outs), + (ins i64i32imm_pcrel:$dst), + "rex64 jmp\t$dst", [], IIC_JMP_REL>; + def TAILJMPr64_REX : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst), + "rex64 jmp{q}\t{*}$dst", [], IIC_JMP_MEM>; + + let mayLoad = 1 in + def TAILJMPm64_REX : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst), + "rex64 jmp{q}\t{*}$dst", [], IIC_JMP_MEM>; + } } diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 60b0cb8f96b..24a7588870c 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -353,6 +353,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::SETSr, X86::SETSm, TB_FOLDED_STORE }, { X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD }, { X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD }, + { X86::TAILJMPr64_REX, X86::TAILJMPm64_REX, TB_FOLDED_LOAD }, { X86::TEST16ri, X86::TEST16mi, TB_FOLDED_LOAD }, { X86::TEST32ri, X86::TEST32mi, TB_FOLDED_LOAD }, { X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD }, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 327727a9ed8..e850ebdd355 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -795,6 +795,7 @@ def Not16BitMode : Predicate<"!Subtarget->is16Bit()">, def In32BitMode : Predicate<"Subtarget->is32Bit()">, AssemblerPredicate<"Mode32Bit", "32-bit mode">; def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; +def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">; def IsPS4 : Predicate<"Subtarget->isTargetPS4()">; def NotPS4 : Predicate<"!Subtarget->isTargetPS4()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 5436fc27ba9..99b1aa2e40c 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -509,6 +509,7 @@ ReSimplify: // inputs modeled as normal uses instead of implicit uses. As such, truncate // off all but the first operand (the callee). FIXME: Change isel. case X86::TAILJMPr64: + case X86::TAILJMPr64_REX: case X86::CALL64r: case X86::CALL64pcrel32: { unsigned Opcode = OutMI.getOpcode(); @@ -1010,8 +1011,14 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { break; } case X86::TAILJMPr: + case X86::TAILJMPm: case X86::TAILJMPd: + case X86::TAILJMPr64: + case X86::TAILJMPm64: case X86::TAILJMPd64: + case X86::TAILJMPr64_REX: + case X86::TAILJMPm64_REX: + case X86::TAILJMPd64_REX: // Lower these as normal, but add some comments. OutStreamer.AddComment("TAILCALL"); break; diff --git a/test/CodeGen/X86/tail-call-win64.ll b/test/CodeGen/X86/tail-call-win64.ll new file mode 100644 index 00000000000..23e9280e772 --- /dev/null +++ b/test/CodeGen/X86/tail-call-win64.ll @@ -0,0 +1,36 @@ +; RUN: llc -mtriple=x86_64-windows -show-mc-encoding < %s | FileCheck %s + +; The Win64 ABI wants tail jmps to use a REX_W prefix so it can distinguish +; in-function jumps from function exiting jumps. + +define void @tail_jmp_reg(i32, i32, void ()* %fptr) { + tail call void ()* %fptr() + ret void +} + +; Check that we merge the REX prefixes into 0x49 instead of 0x48, 0x41. + +; CHECK-LABEL: tail_jmp_reg: +; CHECK: rex64 jmpq *%r8 +; CHECK: encoding: [0x49,0xff,0xe0] + +declare void @tail_tgt() + +define void @tail_jmp_imm() { + tail call void @tail_tgt() + ret void +} + +; CHECK-LABEL: tail_jmp_imm: +; CHECK: rex64 jmp tail_tgt + +@g_fptr = global void ()* @tail_tgt + +define void @tail_jmp_mem() { + %fptr = load void ()** @g_fptr + tail call void ()* %fptr() + ret void +} + +; CHECK-LABEL: tail_jmp_mem: +; CHECK: rex64 jmpq *g_fptr(%rip) diff --git a/test/CodeGen/X86/tailcall-64.ll b/test/CodeGen/X86/tailcall-64.ll index deab1dcc7eb..25d3802ab61 100644 --- a/test/CodeGen/X86/tailcall-64.ll +++ b/test/CodeGen/X86/tailcall-64.ll @@ -182,7 +182,7 @@ define { i64, i64 } @crash(i8* %this) { ; Check that we can fold an indexed load into a tail call instruction. ; CHECK: fold_indexed_load ; CHECK: leaq (%rsi,%rsi,4), %[[RAX:r..]] -; CHECK: jmpq *16(%{{r..}},%[[RAX]],8) # TAILCALL +; CHECK: jmpq *16(%{{r..}},%[[RAX]],8) ## TAILCALL %struct.funcs = type { i32 (i8*, i32*, i32)*, i32 (i8*)*, i32 (i8*)*, i32 (i8*, i32)*, i32 } @func_table = external global [0 x %struct.funcs] define void @fold_indexed_load(i8* %mbstr, i64 %idxprom) nounwind uwtable ssp { @@ -207,7 +207,7 @@ entry: ; } ; ; CHECK-LABEL: rdar12282281 -; CHECK: jmpq *%r11 # TAILCALL +; CHECK: jmpq *%r11 ## TAILCALL @funcs = external constant [0 x i32 (i8*, ...)*] define i32 @rdar12282281(i32 %n) nounwind uwtable ssp { -- 2.34.1