From b0b27c572556a67b2a9f5246b8ceeeb0895c6eec Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Tue, 1 Dec 2015 19:49:31 +0000 Subject: [PATCH] [X86] Make sure the prologue does not clobber EFLAGS when it lives accross it. This fixes PR25629. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254448 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 33 ++++++++- test/CodeGen/X86/x86-shrink-wrapping.ll | 91 +++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 1 deletion(-) diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index f4f7f0cf33b..4ce3dfe0dcb 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -297,6 +297,28 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, } } +// Check if \p MBB defines the flags register before the first terminator. +static bool flagsDefinedLocally(const MachineBasicBlock &MBB) { + MachineBasicBlock::const_iterator FirstTerminator = MBB.getFirstTerminator(); + for (MachineBasicBlock::const_iterator MII : MBB) { + if (MII == FirstTerminator) + return false; + + for (const MachineOperand &MO : MII->operands()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (Reg != X86::EFLAGS) + continue; + + // This instruction sets the eflag. + if (MO.isDef()) + return true; + } + } + return false; +} + MachineInstrBuilder X86FrameLowering::BuildStackAdjustment( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, int64_t Offset, bool InEpilogue) const { @@ -306,7 +328,16 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment( // is tricky. bool UseLEA; if (!InEpilogue) { - UseLEA = STI.useLeaForSP(); + // Check if inserting the prologue at the beginning + // of MBB would require to use LEA operations. + // We need to use LEA operations if both conditions are true: + // 1. One of the terminators need the flags. + // 2. The flags are not defined after the insertion point of the prologue. + // Note: Checking for the predecessors is a shortcut when obviously nothing + // will live accross the prologue. + UseLEA = STI.useLeaForSP() || + (!MBB.pred_empty() && terminatorsNeedFlagsAsInput(MBB) && + !flagsDefinedLocally(MBB)); } else { // If we can use LEA for SP but we shouldn't, check that none // of the terminators uses the eflags. Otherwise we will insert diff --git a/test/CodeGen/X86/x86-shrink-wrapping.ll b/test/CodeGen/X86/x86-shrink-wrapping.ll index 0cab17f9de8..34e56919468 100644 --- a/test/CodeGen/X86/x86-shrink-wrapping.ll +++ b/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -788,3 +788,94 @@ end: %tmp.0 = phi i32 [ %tmp4, %true ], [ %tmp5, %false ] ret i32 %tmp.0 } + +@b = internal unnamed_addr global i1 false +@c = internal unnamed_addr global i8 0, align 1 +@a = common global i32 0, align 4 + +; Make sure the prologue does not clobber the EFLAGS when +; it is live accross. +; PR25629. +; Note: The registers may change in the following patterns, but +; because they imply register hierarchy (e.g., eax, al) this is +; tricky to write robust patterns. +; +; CHECK-LABEL: useLEAForPrologue: +; +; Prologue is at the beginning of the function when shrink-wrapping +; is disabled. +; DISABLE: pushq +; The stack adjustment can use SUB instr because we do not need to +; preserve the EFLAGS at this point. +; DISABLE-NEXT: subq $16, %rsp +; +; Load the value of b. +; CHECK: movb _b(%rip), [[BOOL:%cl]] +; Extract i1 from the loaded value. +; CHECK-NEXT: andb $1, [[BOOL]] +; Create the zero value for the select assignment. +; CHECK-NEXT: xorl [[CMOVE_VAL:%eax]], [[CMOVE_VAL]] +; CHECK-NEXT: testb [[BOOL]], [[BOOL]] +; CHECK-NEXT: jne [[STOREC_LABEL:LBB[0-9_]+]] +; +; CHECK: movb $48, [[CMOVE_VAL:%al]] +; +; CHECK: [[STOREC_LABEL]]: +; +; ENABLE-NEXT: pushq +; For the stack adjustment, we need to preserve the EFLAGS. +; ENABLE-NEXT: leaq -16(%rsp), %rsp +; +; Technically, we should use CMOVE_VAL here or its subregister. +; CHECK-NEXT: movb %al, _c(%rip) +; testb set the EFLAGS read here. +; CHECK-NEXT: je [[VARFUNC_CALL:LBB[0-9_]+]] +; +; The code of the loop is not interesting. +; [...] +; +; CHECK: [[VARFUNC_CALL]]: +; Set the null parameter. +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: callq _varfunc +; +; Set the return value. +; CHECK-NEXT: xorl %eax, %eax +; +; Epilogue code. +; CHECK-NEXT: addq $16, %rsp +; CHECK-NEXT: popq +; CHECK-NEXT: retq +define i32 @useLEAForPrologue(i32 %d, i32 %a, i8 %c) #3 { +entry: + %tmp = alloca i3 + %.b = load i1, i1* @b, align 1 + %bool = select i1 %.b, i8 0, i8 48 + store i8 %bool, i8* @c, align 1 + br i1 %.b, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + tail call void asm sideeffect "nop", "~{ebx}"() + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %inc6 = phi i8 [ %c, %for.body.lr.ph ], [ %inc, %for.body ] + %cond5 = phi i32 [ %a, %for.body.lr.ph ], [ %conv3, %for.body ] + %cmp2 = icmp slt i32 %d, %cond5 + %conv3 = zext i1 %cmp2 to i32 + %inc = add i8 %inc6, 1 + %cmp = icmp slt i8 %inc, 45 + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.body + store i32 %conv3, i32* @a, align 4 + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + %call = tail call i32 (i8*) @varfunc(i8* null) + ret i32 0 +} + +declare i32 @varfunc(i8* nocapture readonly) + +attributes #3 = { nounwind } -- 2.34.1