From 8ba4bf96a98707397f250b511abcdfebb921712e Mon Sep 17 00:00:00 2001 From: "Arnaud A. de Grandmaison" Date: Fri, 20 Nov 2015 21:54:27 +0000 Subject: [PATCH] [ShrinkWrap] Teach ShrinkWrap to handle targets requiring a register scavenger. The included test only checks for a compiler crash for now. Several people are facing this issue, so we first resolve the crash, and will increase shrinkwrap's coverage later in a follow-up patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253718 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ShrinkWrap.cpp | 37 +++--- test/CodeGen/AArch64/shrink-wrap.ll | 184 ++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+), 15 deletions(-) create mode 100755 test/CodeGen/AArch64/shrink-wrap.ll diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp index 07371f66061..bcbe528bb31 100644 --- a/lib/CodeGen/ShrinkWrap.cpp +++ b/lib/CodeGen/ShrinkWrap.cpp @@ -63,6 +63,7 @@ #include "llvm/CodeGen/Passes.h" // To know about callee-saved. #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Debug.h" // To query the target about frame lowering. @@ -130,15 +131,15 @@ class ShrinkWrap : public MachineFunctionPass { /// \brief Check if \p MI uses or defines a callee-saved register or /// a frame index. If this is the case, this means \p MI must happen /// after Save and before Restore. - bool useOrDefCSROrFI(const MachineInstr &MI) const; + bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const; - const SetOfRegs &getCurrentCSRs() const { + const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const { if (CurrentCSRs.empty()) { BitVector SavedRegs; const TargetFrameLowering *TFI = MachineFunc->getSubtarget().getFrameLowering(); - TFI->determineCalleeSaves(*MachineFunc, SavedRegs, nullptr); + TFI->determineCalleeSaves(*MachineFunc, SavedRegs, RS); for (int Reg = SavedRegs.find_first(); Reg != -1; Reg = SavedRegs.find_next(Reg)) @@ -152,7 +153,7 @@ class ShrinkWrap : public MachineFunctionPass { /// and Save and Restore still match the safe point definition. /// Such point may not exist and Save and/or Restore may be null after /// this call. - void updateSaveRestorePoints(MachineBasicBlock &MBB); + void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS); /// \brief Initialize the pass for \p MF. void init(MachineFunction &MF) { @@ -180,7 +181,7 @@ class ShrinkWrap : public MachineFunctionPass { /// \brief Check if shrink wrapping is enabled for this target and function. static bool isShrinkWrapEnabled(const MachineFunction &MF); - + public: static char ID; @@ -218,7 +219,8 @@ INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_END(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, false) -bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI) const { +bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, + RegScavenger *RS) const { if (MI.getOpcode() == FrameSetupOpcode || MI.getOpcode() == FrameDestroyOpcode) { DEBUG(dbgs() << "Frame instruction: " << MI << '\n'); @@ -235,7 +237,7 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI) const { UseOrDefCSR = RCI.getLastCalleeSavedAlias(PhysReg); } else if (MO.isRegMask()) { // Check if this regmask clobbers any of the CSRs. - for (unsigned Reg : getCurrentCSRs()) { + for (unsigned Reg : getCurrentCSRs(RS)) { if (MO.clobbersPhysReg(Reg)) { UseOrDefCSR = true; break; @@ -264,7 +266,8 @@ MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs, return IDom; } -void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) { +void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, + RegScavenger *RS) { // Get rid of the easy cases first. if (!Save) Save = &MBB; @@ -285,7 +288,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) { // terminator. if (Restore == &MBB) { for (const MachineInstr &Terminator : MBB.terminators()) { - if (!useOrDefCSROrFI(Terminator)) + if (!useOrDefCSROrFI(Terminator, RS)) continue; // One of the terminator needs to happen before the restore point. if (MBB.succ_empty()) { @@ -331,7 +334,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) { MLI->getLoopFor(Save) != MLI->getLoopFor(Restore)) { if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) { // Push Save outside of this loop if immediate dominator is different - // from save block. If immediate dominator is not different, bail out. + // from save block. If immediate dominator is not different, bail out. MachineBasicBlock *IDom = FindIDom<>(*Save, Save->predecessors(), *MDT); if (IDom != Save) Save = IDom; @@ -357,12 +360,12 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) { // then we are stuck in a program with an infinite loop. // In that case, we will not find a safe point, hence, bail out. if (IPdom && MLI->getLoopDepth(IPdom) < MLI->getLoopDepth(Restore)) - Restore = IPdom; + Restore = IPdom; else { Restore = nullptr; break; } - } + } } } } @@ -375,6 +378,10 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { init(MF); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + std::unique_ptr RS( + TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr); + for (MachineBasicBlock &MBB : MF) { DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName() << '\n'); @@ -385,11 +392,11 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { } for (const MachineInstr &MI : MBB) { - if (!useOrDefCSROrFI(MI)) + if (!useOrDefCSROrFI(MI, RS.get())) continue; // Save (resp. restore) point must dominate (resp. post dominate) // MI. Look for the proper basic block for those. - updateSaveRestorePoints(MBB); + updateSaveRestorePoints(MBB, RS.get()); // If we are at a point where we cannot improve the placement of // save/restore instructions, just give up. if (!ArePointsInteresting()) { @@ -441,7 +448,7 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { break; NewBB = Restore; } - updateSaveRestorePoints(*NewBB); + updateSaveRestorePoints(*NewBB, RS.get()); } while (Save && Restore); if (!ArePointsInteresting()) { diff --git a/test/CodeGen/AArch64/shrink-wrap.ll b/test/CodeGen/AArch64/shrink-wrap.ll new file mode 100755 index 00000000000..ea101a8da15 --- /dev/null +++ b/test/CodeGen/AArch64/shrink-wrap.ll @@ -0,0 +1,184 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -o - %s + +; Regression test for a crash in the ShrinkWrap pass not handling targets +; requiring a register scavenger. + +%type1 = type { i32, i32, i32 } + +@g1 = external unnamed_addr global i32, align 4 +@g2 = external unnamed_addr global i1 +@g3 = external unnamed_addr global [144 x i32], align 4 +@g4 = external unnamed_addr constant [144 x i32], align 4 +@g5 = external unnamed_addr constant [144 x i32], align 4 +@g6 = external unnamed_addr constant [144 x i32], align 4 +@g7 = external unnamed_addr constant [144 x i32], align 4 +@g8 = external unnamed_addr constant [144 x i32], align 4 +@g9 = external unnamed_addr constant [144 x i32], align 4 +@g10 = external unnamed_addr constant [144 x i32], align 4 +@g11 = external unnamed_addr global i32, align 4 +@g12 = external unnamed_addr global [144 x [144 x i8]], align 1 +@g13 = external unnamed_addr global %type1*, align 8 +@g14 = external unnamed_addr global [144 x [144 x i8]], align 1 +@g15 = external unnamed_addr global [144 x [144 x i8]], align 1 +@g16 = external unnamed_addr global [144 x [144 x i8]], align 1 +@g17 = external unnamed_addr global [62 x i32], align 4 +@g18 = external unnamed_addr global i32, align 4 +@g19 = external unnamed_addr constant [144 x i32], align 4 +@g20 = external unnamed_addr global [144 x [144 x i8]], align 1 +@g21 = external unnamed_addr global i32, align 4 + +declare fastcc i32 @foo() + +declare fastcc i32 @bar() + +define internal fastcc i32 @func(i32 %alpha, i32 %beta) { +entry: + %v1 = alloca [2 x [11 x i32]], align 4 + %v2 = alloca [11 x i32], align 16 + %v3 = alloca [11 x i32], align 16 + switch i32 undef, label %if.end.9 [ + i32 4, label %if.then.6 + i32 3, label %if.then.2 + ] + +if.then.2: + %call3 = tail call fastcc i32 @bar() + br label %cleanup + +if.then.6: + %call7 = tail call fastcc i32 @foo() + unreachable + +if.end.9: + %tmp = load i32, i32* @g1, align 4 + %rem.i = urem i32 %tmp, 1000000 + %idxprom.1.i = zext i32 %rem.i to i64 + %tmp1 = load %type1*, %type1** @g13, align 8 + %v4 = getelementptr inbounds %type1, %type1* %tmp1, i64 %idxprom.1.i, i32 0 + %.b = load i1, i1* @g2, align 1 + %v5 = select i1 %.b, i32 2, i32 0 + %tmp2 = load i32, i32* @g18, align 4 + %tmp3 = load i32, i32* @g11, align 4 + %idxprom58 = sext i32 %tmp3 to i64 + %tmp4 = load i32, i32* @g21, align 4 + %idxprom69 = sext i32 %tmp4 to i64 + br label %for.body + +for.body: + %v6 = phi i32 [ 0, %if.end.9 ], [ %v7, %for.inc ] + %a.0983 = phi i32 [ 1, %if.end.9 ], [ %a.1, %for.inc ] + %arrayidx = getelementptr inbounds [62 x i32], [62 x i32]* @g17, i64 0, i64 undef + %tmp5 = load i32, i32* %arrayidx, align 4 + br i1 undef, label %for.inc, label %if.else.51 + +if.else.51: + %idxprom53 = sext i32 %tmp5 to i64 + %arrayidx54 = getelementptr inbounds [144 x i32], [144 x i32]* @g3, i64 0, i64 %idxprom53 + %tmp6 = load i32, i32* %arrayidx54, align 4 + switch i32 %tmp6, label %for.inc [ + i32 1, label %block.bb + i32 10, label %block.bb.159 + i32 7, label %block.bb.75 + i32 8, label %block.bb.87 + i32 9, label %block.bb.147 + i32 12, label %block.bb.111 + i32 3, label %block.bb.123 + i32 4, label %block.bb.135 + ] + +block.bb: + %arrayidx56 = getelementptr inbounds [144 x i32], [144 x i32]* @g6, i64 0, i64 %idxprom53 + %tmp7 = load i32, i32* %arrayidx56, align 4 + %shr = ashr i32 %tmp7, %v5 + %add57 = add nsw i32 %shr, 0 + %arrayidx61 = getelementptr inbounds [144 x [144 x i8]], [144 x [144 x i8]]* @g14, i64 0, i64 %idxprom53, i64 %idxprom58 + %tmp8 = load i8, i8* %arrayidx61, align 1 + %conv = zext i8 %tmp8 to i32 + %add62 = add nsw i32 %conv, %add57 + br label %for.inc + +block.bb.75: + %arrayidx78 = getelementptr inbounds [144 x i32], [144 x i32]* @g10, i64 0, i64 %idxprom53 + %tmp9 = load i32, i32* %arrayidx78, align 4 + %shr79 = ashr i32 %tmp9, %v5 + %add80 = add nsw i32 %shr79, 0 + %add86 = add nsw i32 0, %add80 + br label %for.inc + +block.bb.87: + %arrayidx90 = getelementptr inbounds [144 x i32], [144 x i32]* @g9, i64 0, i64 %idxprom53 + %tmp10 = load i32, i32* %arrayidx90, align 4 + %shr91 = ashr i32 %tmp10, 0 + %sub92 = sub nsw i32 0, %shr91 + %arrayidx96 = getelementptr inbounds [144 x [144 x i8]], [144 x [144 x i8]]* @g15, i64 0, i64 %idxprom53, i64 %idxprom69 + %tmp11 = load i8, i8* %arrayidx96, align 1 + %conv97 = zext i8 %tmp11 to i32 + %sub98 = sub nsw i32 %sub92, %conv97 + br label %for.inc + +block.bb.111: + %arrayidx114 = getelementptr inbounds [144 x i32], [144 x i32]* @g19, i64 0, i64 %idxprom53 + %tmp12 = load i32, i32* %arrayidx114, align 4 + %shr115 = ashr i32 %tmp12, 0 + %sub116 = sub nsw i32 0, %shr115 + %arrayidx120 = getelementptr inbounds [144 x [144 x i8]], [144 x [144 x i8]]* @g12, i64 0, i64 %idxprom53, i64 %idxprom69 + %tmp13 = load i8, i8* %arrayidx120, align 1 + %conv121 = zext i8 %tmp13 to i32 + %sub122 = sub nsw i32 %sub116, %conv121 + br label %for.inc + +block.bb.123: + %arrayidx126 = getelementptr inbounds [144 x i32], [144 x i32]* @g5, i64 0, i64 %idxprom53 + %tmp14 = load i32, i32* %arrayidx126, align 4 + %shr127 = ashr i32 %tmp14, %v5 + %add128 = add nsw i32 %shr127, 0 + %add134 = add nsw i32 0, %add128 + br label %for.inc + +block.bb.135: + %arrayidx138 = getelementptr inbounds [144 x i32], [144 x i32]* @g4, i64 0, i64 %idxprom53 + %tmp15 = load i32, i32* %arrayidx138, align 4 + %shr139 = ashr i32 %tmp15, 0 + %sub140 = sub nsw i32 0, %shr139 + %arrayidx144 = getelementptr inbounds [144 x [144 x i8]], [144 x [144 x i8]]* @g20, i64 0, i64 %idxprom53, i64 %idxprom69 + %tmp16 = load i8, i8* %arrayidx144, align 1 + %conv145 = zext i8 %tmp16 to i32 + %sub146 = sub nsw i32 %sub140, %conv145 + br label %for.inc + +block.bb.147: + %arrayidx150 = getelementptr inbounds [144 x i32], [144 x i32]* @g8, i64 0, i64 %idxprom53 + %tmp17 = load i32, i32* %arrayidx150, align 4 + %shr151 = ashr i32 %tmp17, %v5 + %add152 = add nsw i32 %shr151, 0 + %arrayidx156 = getelementptr inbounds [144 x [144 x i8]], [144 x [144 x i8]]* @g16, i64 0, i64 %idxprom53, i64 %idxprom58 + %tmp18 = load i8, i8* %arrayidx156, align 1 + %conv157 = zext i8 %tmp18 to i32 + %add158 = add nsw i32 %conv157, %add152 + br label %for.inc + +block.bb.159: + %sub160 = add nsw i32 %v6, -450 + %arrayidx162 = getelementptr inbounds [144 x i32], [144 x i32]* @g7, i64 0, i64 %idxprom53 + %tmp19 = load i32, i32* %arrayidx162, align 4 + %shr163 = ashr i32 %tmp19, 0 + %sub164 = sub nsw i32 %sub160, %shr163 + %sub170 = sub nsw i32 %sub164, 0 + br label %for.inc + +for.inc: + %v7 = phi i32 [ %v6, %for.body ], [ %v6, %if.else.51 ], [ %sub170, %block.bb.159 ], [ %add158, %block.bb.147 ], [ %sub146, %block.bb.135 ], [ %add134, %block.bb.123 ], [ %sub122, %block.bb.111 ], [ %sub98, %block.bb.87 ], [ %add86, %block.bb.75 ], [ %add62, %block.bb ] + %a.1 = phi i32 [ %a.0983, %for.body ], [ undef, %if.else.51 ], [ undef, %block.bb.159 ], [ undef, %block.bb.147 ], [ undef, %block.bb.135 ], [ undef, %block.bb.123 ], [ undef, %block.bb.111 ], [ undef, %block.bb.87 ], [ undef, %block.bb.75 ], [ undef, %block.bb ] + %cmp48 = icmp sgt i32 %a.1, %tmp2 + br i1 %cmp48, label %for.end, label %for.body + +for.end: + store i32 %tmp, i32* %v4, align 4 + %hold_hash.i.7 = getelementptr inbounds %type1, %type1* %tmp1, i64 %idxprom.1.i, i32 1 + store i32 0, i32* %hold_hash.i.7, align 4 + br label %cleanup + +cleanup: + %retval.0 = phi i32 [ %call3, %if.then.2 ], [ undef, %for.end ] + ret i32 %retval.0 +} -- 2.34.1