From: Quentin Colombet Date: Wed, 22 Jul 2015 16:34:37 +0000 (+0000) Subject: [ARM] Make the frame lowering code ready for shrink-wrapping. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=c59ca90970a7e634571550cba7519836afd78e55;p=oota-llvm.git [ARM] Make the frame lowering code ready for shrink-wrapping. Shrink-wrapping can now be tested on ARM with -enable-shrink-wrap. Related to git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242908 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 7b2369463e8..ba0d5f25b09 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -288,7 +288,6 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, void ARMFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { - assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented"); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); @@ -1842,7 +1841,6 @@ void ARMFrameLowering::adjustForSegmentedStacks( if (!ST->isTargetAndroid() && !ST->isTargetLinux()) report_fatal_error("Segmented stacks not supported on this platform."); - assert(&PrologueMBB == &MF.front() && "Shrink-wrapping not yet implemented"); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); MCContext &Context = MMI.getContext(); diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 79883c100fa..46b31c350a0 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -85,7 +85,6 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { - assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented"); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); @@ -385,135 +384,156 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, } } + if (needPopSpecialFixUp(MF)) { + bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true); + (void)Done; + assert(Done && "Emission of the special fixup failed!?"); + } +} + +bool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { + if (!needPopSpecialFixUp(*MBB.getParent())) + return true; + + MachineBasicBlock *TmpMBB = const_cast(&MBB); + return emitPopSpecialFixUp(*TmpMBB, /* DoIt */ false); +} + +bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const { + ARMFunctionInfo *AFI = + const_cast(&MF)->getInfo(); + if (AFI->getArgRegsSaveSize()) + return true; + bool IsV4PopReturn = false; - for (const CalleeSavedInfo &CSI : MFI->getCalleeSavedInfo()) + for (const CalleeSavedInfo &CSI : MF.getFrameInfo()->getCalleeSavedInfo()) if (CSI.getReg() == ARM::LR) IsV4PopReturn = true; - IsV4PopReturn &= STI.hasV4TOps() && !STI.hasV5TOps(); - - // Unlike T2 and ARM mode, the T1 pop instruction cannot restore - // to LR, and we can't pop the value directly to the PC since - // we need to update the SP after popping the value. So instead - // we have to emit: - // POP {r3} - // ADD sp, #offset - // BX r3 - // If this would clobber a return value, then generate this sequence instead: - // MOV ip, r3 - // POP {r3} - // ADD sp, #offset - // MOV lr, r3 - // MOV r3, ip - // BX lr - if (ArgRegsSaveSize || IsV4PopReturn) { - // If MBBI is a return instruction, we may be able to directly restore - // LR in the PC. - // This is possible if we do not need to emit any SP update. - // Otherwise, we need a temporary register to pop the value - // and copy that value into LR. - MBBI = MBB.getFirstTerminator(); - if (!ArgRegsSaveSize && MBBI != MBB.end() && - MBBI->getOpcode() == ARM::tBX_RET) { - MachineInstrBuilder MIB = - AddDefaultPred( - BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))) - .addReg(ARM::PC, RegState::Define); - MIB.copyImplicitOps(&*MBBI); - // erase the old tBX_RET instruction - MBB.erase(MBBI); - return; - } + return IsV4PopReturn && STI.hasV4TOps() && !STI.hasV5TOps(); +} - // Look for a temporary register to use. - // First, compute the liveness information. - LivePhysRegs UsedRegs(STI.getRegisterInfo()); - UsedRegs.addLiveOuts(&MBB, /*AddPristines*/ true); - // The semantic of pristines changed recently and now, - // the callee-saved registers that are touched in the function - // are not part of the pristines set anymore. - // Add those callee-saved now. - const TargetRegisterInfo *TRI = STI.getRegisterInfo(); - const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); - for (unsigned i = 0; CSRegs[i]; ++i) - UsedRegs.addReg(CSRegs[i]); - - DebugLoc dl = DebugLoc(); - if (MBBI != MBB.end()) { - dl = MBBI->getDebugLoc(); - auto InstUpToMBBI = MBB.end(); - // The post-decrement is on purpose here. - // We want to have the liveness right before MBBI. - while (InstUpToMBBI-- != MBBI) - UsedRegs.stepBackward(*InstUpToMBBI); - } +bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, + bool DoIt) const { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); + const ThumbRegisterInfo *RegInfo = + static_cast(STI.getRegisterInfo()); - // Look for a register that can be directly use in the POP. - unsigned PopReg = 0; - // And some temporary register, just in case. - unsigned TemporaryReg = 0; - BitVector PopFriendly = - TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID)); - assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); - // Rebuild the GPRs from the high registers because they are removed - // form the GPR reg class for thumb1. - BitVector GPRsNoLRSP = - TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID)); - GPRsNoLRSP |= PopFriendly; - GPRsNoLRSP.reset(ARM::LR); - GPRsNoLRSP.reset(ARM::SP); - GPRsNoLRSP.reset(ARM::PC); - for (int Register = GPRsNoLRSP.find_first(); Register != -1; - Register = GPRsNoLRSP.find_next(Register)) { - if (!UsedRegs.contains(Register)) { - // Remember the first pop-friendly register and exit. - if (PopFriendly.test(Register)) { - PopReg = Register; - TemporaryReg = 0; - break; - } - // Otherwise, remember that the register will be available to - // save a pop-friendly register. - TemporaryReg = Register; - } - } + // If MBBI is a return instruction, we may be able to directly restore + // LR in the PC. + // This is possible if we do not need to emit any SP update. + // Otherwise, we need a temporary register to pop the value + // and copy that value into LR. + auto MBBI = MBB.getFirstTerminator(); + if (!ArgRegsSaveSize && MBBI != MBB.end() && + MBBI->getOpcode() == ARM::tBX_RET) { + if (!DoIt) + return true; + MachineInstrBuilder MIB = + AddDefaultPred( + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))) + .addReg(ARM::PC, RegState::Define); + MIB.copyImplicitOps(&*MBBI); + // erase the old tBX_RET instruction + MBB.erase(MBBI); + return true; + } - assert((PopReg || TemporaryReg) && "Cannot get LR"); + // Look for a temporary register to use. + // First, compute the liveness information. + LivePhysRegs UsedRegs(STI.getRegisterInfo()); + UsedRegs.addLiveOuts(&MBB, /*AddPristines*/ true); + // The semantic of pristines changed recently and now, + // the callee-saved registers that are touched in the function + // are not part of the pristines set anymore. + // Add those callee-saved now. + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); + for (unsigned i = 0; CSRegs[i]; ++i) + UsedRegs.addReg(CSRegs[i]); + + DebugLoc dl = DebugLoc(); + if (MBBI != MBB.end()) { + dl = MBBI->getDebugLoc(); + auto InstUpToMBBI = MBB.end(); + // The post-decrement is on purpose here. + // We want to have the liveness right before MBBI. + while (InstUpToMBBI-- != MBBI) + UsedRegs.stepBackward(*InstUpToMBBI); + } - if (TemporaryReg) { - assert(!PopReg && "Unnecessary MOV is about to be inserted"); - PopReg = PopFriendly.find_first(); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) - .addReg(TemporaryReg, RegState::Define) - .addReg(PopReg, RegState::Kill)); + // Look for a register that can be directly use in the POP. + unsigned PopReg = 0; + // And some temporary register, just in case. + unsigned TemporaryReg = 0; + BitVector PopFriendly = + TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID)); + assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); + // Rebuild the GPRs from the high registers because they are removed + // form the GPR reg class for thumb1. + BitVector GPRsNoLRSP = + TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID)); + GPRsNoLRSP |= PopFriendly; + GPRsNoLRSP.reset(ARM::LR); + GPRsNoLRSP.reset(ARM::SP); + GPRsNoLRSP.reset(ARM::PC); + for (int Register = GPRsNoLRSP.find_first(); Register != -1; + Register = GPRsNoLRSP.find_next(Register)) { + if (!UsedRegs.contains(Register)) { + // Remember the first pop-friendly register and exit. + if (PopFriendly.test(Register)) { + PopReg = Register; + TemporaryReg = 0; + break; + } + // Otherwise, remember that the register will be available to + // save a pop-friendly register. + TemporaryReg = Register; } + } - assert(PopReg && "Do not know how to get LR"); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) - .addReg(PopReg, RegState::Define); - - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); - - if (!TemporaryReg && MBBI != MBB.end() && - MBBI->getOpcode() == ARM::tBX_RET) { - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX)) - .addReg(PopReg, RegState::Kill); - AddDefaultPred(MIB); - MIB.copyImplicitOps(&*MBBI); - // erase the old tBX_RET instruction - MBB.erase(MBBI); - return; - } + if (!DoIt && !PopReg && !TemporaryReg) + return false; + assert((PopReg || TemporaryReg) && "Cannot get LR"); + + if (TemporaryReg) { + assert(!PopReg && "Unnecessary MOV is about to be inserted"); + PopReg = PopFriendly.find_first(); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) - .addReg(ARM::LR, RegState::Define) + .addReg(TemporaryReg, RegState::Define) .addReg(PopReg, RegState::Kill)); + } - if (TemporaryReg) { - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) - .addReg(PopReg, RegState::Define) - .addReg(TemporaryReg, RegState::Kill)); - } + assert(PopReg && "Do not know how to get LR"); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) + .addReg(PopReg, RegState::Define); + + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); + + if (!TemporaryReg && MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET) { + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX)) + .addReg(PopReg, RegState::Kill); + AddDefaultPred(MIB); + MIB.copyImplicitOps(&*MBBI); + // erase the old tBX_RET instruction + MBB.erase(MBBI); + return true; + } + + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(ARM::LR, RegState::Define) + .addReg(PopReg, RegState::Kill)); + + if (TemporaryReg) { + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(PopReg, RegState::Define) + .addReg(TemporaryReg, RegState::Kill)); } + + return true; } bool Thumb1FrameLowering:: @@ -567,7 +587,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, const TargetInstrInfo &TII = *STI.getInstrInfo(); bool isVarArg = AFI->getArgRegsSaveSize() > 0; - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)); AddDefaultPred(MIB); diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h index 31d57325ebd..812f9830824 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.h +++ b/lib/Target/ARM/Thumb1FrameLowering.h @@ -45,6 +45,42 @@ public: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; + + /// Check whether or not the given \p MBB can be used as a epilogue + /// for the target. + /// The epilogue will be inserted before the first terminator of that block. + /// This method is used by the shrink-wrapping pass to decide if + /// \p MBB will be correctly handled by the target. + bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override; + +private: + /// Check if the frame lowering of \p MF needs a special fixup + /// code sequence for the epilogue. + /// Unlike T2 and ARM mode, the T1 pop instruction cannot restore + /// to LR, and we can't pop the value directly to the PC when + /// we need to update the SP after popping the value. So instead + /// we have to emit: + /// POP {r3} + /// ADD sp, #offset + /// BX r3 + /// If this would clobber a return value, then generate this sequence instead: + /// MOV ip, r3 + /// POP {r3} + /// ADD sp, #offset + /// MOV lr, r3 + /// MOV r3, ip + /// BX lr + bool needPopSpecialFixUp(const MachineFunction &MF) const; + + /// Emit the special fixup code sequence for the epilogue. + /// \see needPopSpecialFixUp for more details. + /// \p DoIt, tells this method whether or not to actually insert + /// the code sequence in \p MBB. I.e., when \p DoIt is false, + /// \p MBB is left untouched. + /// \returns For \p DoIt == true: True when the emission succeeded + /// false otherwise. For \p DoIt == false: True when the emission + /// would have been possible, false otherwise. + bool emitPopSpecialFixUp(MachineBasicBlock &MBB, bool DoIt) const; }; } // End llvm namespace diff --git a/test/CodeGen/ARM/arm-shrink-wrapping.ll b/test/CodeGen/ARM/arm-shrink-wrapping.ll new file mode 100644 index 00000000000..f033ac47309 --- /dev/null +++ b/test/CodeGen/ARM/arm-shrink-wrapping.ll @@ -0,0 +1,536 @@ +; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=armv7-apple-ios \ +; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ARM --check-prefix=ENABLE --check-prefix=ARM-ENABLE +; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=armv7-apple-ios \ +; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ARM --check-prefix=DISABLE --check-prefix=ARM-DISABLE +; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv7-apple-ios \ +; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB --check-prefix=ENABLE --check-prefix=THUMB-ENABLE +; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv7-apple-ios \ +; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB --check-prefix=DISABLE --check-prefix=THUMB-DISABLE + +; +; Note: Lots of tests use inline asm instead of regular calls. +; This allows to have a better control on what the allocation will do. +; Otherwise, we may have spill right in the entry block, defeating +; shrink-wrapping. Moreover, some of the inline asm statements (nop) +; are here to ensure that the related paths do not end up as critical +; edges. +; Also disable the late if-converter as it makes harder to reason on +; the diffs. + +; Initial motivating example: Simple diamond with a call just on one side. +; CHECK-LABEL: foo: +; +; Compare the arguments and jump to exit. +; No prologue needed. +; ENABLE: cmp r0, r1 +; ENABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]] +; +; Prologue code. +; CHECK: push {r7, lr} +; CHECK-NEXT: mov r7, sp +;; +; Compare the arguments and jump to exit. +; After the prologue is set. +; DISABLE: sub sp +; DISABLE: cmp r0, r1 +; DISABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]] +; +; Store %a in the alloca. +; ARM-ENABLE: push {r0} +; THUMB-ENABLE: str r0, [sp, #-4] +; DISABLE: str r0, [sp] +; Set the alloca address in the second argument. +; CHECK-NEXT: mov r1, sp +; Set the first argument to zero. +; CHECK-NEXT: mov{{s?}} r0, #0 +; CHECK-NEXT: bl{{x?}} _doSomething +; +; With shrink-wrapping, epilogue is just after the call. +; ARM-ENABLE-NEXT: mov sp, r7 +; THUMB-ENABLE-NEXT: add sp, #4 +; ENABLE-NEXT: pop{{(\.w)?}} {r7, lr} +; +; CHECK: [[EXIT_LABEL]]: +; +; Without shrink-wrapping, epilogue is in the exit block. +; Epilogue code. (What we pop does not matter.) +; ARM-DISABLE: mov sp, r7 +; THUMB-DISABLE: add sp, +; DISABLE-NEXT: pop {r7, pc} +; +; ENABLE-NEXT: bx lr +define i32 @foo(i32 %a, i32 %b) { + %tmp = alloca i32, align 4 + %tmp2 = icmp slt i32 %a, %b + br i1 %tmp2, label %true, label %false + +true: + store i32 %a, i32* %tmp, align 4 + %tmp4 = call i32 @doSomething(i32 0, i32* %tmp) + br label %false + +false: + %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ] + ret i32 %tmp.0 +} + +; Function Attrs: optsize +declare i32 @doSomething(i32, i32*) + + +; Check that we do not perform the restore inside the loop whereas the save +; is outside. +; CHECK-LABEL: freqSaveAndRestoreOutsideLoop: +; +; Shrink-wrapping allows to skip the prologue in the else case. +; ARM-ENABLE: cmp r0, #0 +; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] +; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; Prologue code. +; Make sure we save the CSR used in the inline asm: r4. +; CHECK: push {r4, r7, lr} +; CHECK-NEXT: add r7, sp, #4 +; +; ARM-DISABLE: cmp r0, #0 +; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] +; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; SUM is in r0 because it is coalesced with the second +; argument on the else path. +; CHECK: mov{{s?}} [[SUM:r0]], #0 +; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10 +; +; Next BB. +; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body +; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1 +; ARM: subs [[IV]], [[IV]], #1 +; THUMB: subs [[IV]], #1 +; ARM-NEXT: add [[SUM]], [[TMP]], [[SUM]] +; THUMB-NEXT: add [[SUM]], [[TMP]] +; CHECK-NEXT: bne [[LOOP]] +; +; Next BB. +; SUM << 3. +; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3 +; ENABLE-NEXT: pop {r4, r7, pc} +; +; Duplicated epilogue. +; DISABLE: pop {r4, r7, pc} +; +; CHECK: [[ELSE_LABEL]]: @ %if.else +; Shift second argument by one and store into returned register. +; CHECK: lsl{{s?}} r0, r1, #1 +; DISABLE-NEXT: pop {r4, r7, pc} +; +; ENABLE-NEXT: bx lr +define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.else, label %for.preheader + +for.preheader: + tail call void asm "nop", ""() + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] + %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ] + %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"() + %add = add nsw i32 %call, %sum.04 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond = icmp eq i32 %inc, 10 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + %shl = shl i32 %add, 3 + br label %if.end + +if.else: ; preds = %entry + %mul = shl nsw i32 %N, 1 + br label %if.end + +if.end: ; preds = %if.else, %for.end + %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] + ret i32 %sum.1 +} + +declare i32 @something(...) + +; Check that we do not perform the shrink-wrapping inside the loop even +; though that would be legal. The cost model must prevent that. +; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2: +; Prologue code. +; Make sure we save the CSR used in the inline asm: r4. +; CHECK: push {r4 +; CHECK: mov{{s?}} [[SUM:r0]], #0 +; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10 +; CHECK: nop +; Next BB. +; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: @ %for.body +; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1 +; ARM: subs [[IV]], [[IV]], #1 +; THUMB: subs [[IV]], #1 +; ARM: add [[SUM]], [[TMP]], [[SUM]] +; THUMB: add [[SUM]], [[TMP]] +; CHECK-NEXT: bne [[LOOP_LABEL]] +; Next BB. +; CHECK: @ %for.exit +; CHECK: nop +; CHECK: pop {r4 +define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { +entry: + br label %for.preheader + +for.preheader: + tail call void asm "nop", ""() + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ] + %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ] + %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"() + %add = add nsw i32 %call, %sum.03 + %inc = add nuw nsw i32 %i.04, 1 + %exitcond = icmp eq i32 %inc, 10 + br i1 %exitcond, label %for.exit, label %for.body + +for.exit: + tail call void asm "nop", ""() + br label %for.end + +for.end: ; preds = %for.body + ret i32 %add +} + +; Check with a more complex case that we do not have save within the loop and +; restore outside. +; CHECK-LABEL: loopInfoSaveOutsideLoop: +; +; ARM-ENABLE: cmp r0, #0 +; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] +; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; Prologue code. +; Make sure we save the CSR used in the inline asm: r4. +; CHECK: push {r4, r7, lr} +; CHECK-NEXT: add r7, sp, #4 +; +; ARM-DISABLE: cmp r0, #0 +; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] +; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; SUM is in r0 because it is coalesced with the second +; argument on the else path. +; CHECK: mov{{s?}} [[SUM:r0]], #0 +; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10 +; +; Next BB. +; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body +; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1 +; ARM: subs [[IV]], [[IV]], #1 +; THUMB: subs [[IV]], #1 +; ARM-NEXT: add [[SUM]], [[TMP]], [[SUM]] +; THUMB-NEXT: add [[SUM]], [[TMP]] +; CHECK-NEXT: bne [[LOOP]] +; +; Next BB. +; SUM << 3. +; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3 +; ENABLE: pop {r4, r7, pc} +; +; Duplicated epilogue. +; DISABLE: pop {r4, r7, pc} +; +; CHECK: [[ELSE_LABEL]]: @ %if.else +; Shift second argument by one and store into returned register. +; CHECK: lsl{{s?}} r0, r1, #1 +; DISABLE-NEXT: pop {r4, r7, pc} +; +; ENABLE-NEXT: bx lr +define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.else, label %for.preheader + +for.preheader: + tail call void asm "nop", ""() + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] + %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ] + %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"() + %add = add nsw i32 %call, %sum.04 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond = icmp eq i32 %inc, 10 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + tail call void asm "nop", "~{r4}"() + %shl = shl i32 %add, 3 + br label %if.end + +if.else: ; preds = %entry + %mul = shl nsw i32 %N, 1 + br label %if.end + +if.end: ; preds = %if.else, %for.end + %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] + ret i32 %sum.1 +} + +declare void @somethingElse(...) + +; Check with a more complex case that we do not have restore within the loop and +; save outside. +; CHECK-LABEL: loopInfoRestoreOutsideLoop: +; +; ARM-ENABLE: cmp r0, #0 +; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] +; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; Prologue code. +; Make sure we save the CSR used in the inline asm: r4. +; CHECK: push {r4, r7, lr} +; CHECK-NEXT: add r7, sp, #4 +; +; ARM-DISABLE: cmp r0, #0 +; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] +; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; SUM is in r0 because it is coalesced with the second +; argument on the else path. +; CHECK: mov{{s?}} [[SUM:r0]], #0 +; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10 +; +; Next BB. +; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body +; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1 +; ARM: subs [[IV]], [[IV]], #1 +; THUMB: subs [[IV]], #1 +; ARM-NEXT: add [[SUM]], [[TMP]], [[SUM]] +; THUMB-NEXT: add [[SUM]], [[TMP]] +; CHECK-NEXT: bne [[LOOP]] +; +; Next BB. +; SUM << 3. +; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3 +; ENABLE-NEXT: pop {r4, r7, pc} +; +; Duplicated epilogue. +; DISABLE: pop {r4, r7, pc} +; +; CHECK: [[ELSE_LABEL]]: @ %if.else +; Shift second argument by one and store into returned register. +; CHECK: lsl{{s?}} r0, r1, #1 +; DISABLE-NEXT: pop {r4, r7, pc} +; +; ENABLE-NEXT: bx lr +define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + tail call void asm "nop", "~{r4}"() + br label %for.body + +for.body: ; preds = %for.body, %if.then + %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ] + %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ] + %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"() + %add = add nsw i32 %call, %sum.04 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond = icmp eq i32 %inc, 10 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + %shl = shl i32 %add, 3 + br label %if.end + +if.else: ; preds = %entry + %mul = shl nsw i32 %N, 1 + br label %if.end + +if.end: ; preds = %if.else, %for.end + %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] + ret i32 %sum.1 +} + +; Check that we handle function with no frame information correctly. +; CHECK-LABEL: emptyFrame: +; CHECK: @ %entry +; CHECK-NEXT: mov{{s?}} r0, #0 +; CHECK-NEXT: bx lr +define i32 @emptyFrame() { +entry: + ret i32 0 +} + +; Check that we handle inline asm correctly. +; CHECK-LABEL: inlineAsm: +; +; ARM-ENABLE: cmp r0, #0 +; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] +; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; Prologue code. +; Make sure we save the CSR used in the inline asm: r4. +; CHECK: push {r4, r7, lr} +; CHECK-NEXT: add r7, sp, #4 +; +; ARM-DISABLE: cmp r0, #0 +; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] +; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; CHECK: mov{{s?}} [[IV:r[0-9]+]], #10 +; +; Next BB. +; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body +; ARM: subs [[IV]], [[IV]], #1 +; THUMB: subs [[IV]], #1 +; CHECK: add{{(\.w)?}} r4, r4, #1 +; CHECK: bne [[LOOP]] +; +; Next BB. +; CHECK: mov{{s?}} r0, #0 +; +; Duplicated epilogue. +; DISABLE: pop {r4, r7, pc} +; +; CHECK: [[ELSE_LABEL]]: @ %if.else +; Shift second argument by one and store into returned register. +; CHECK: lsl{{s?}} r0, r1, #1 +; DISABLE-NEXT: pop {r4, r7, pc} +; +; ENABLE-NEXT: bx lr +define i32 @inlineAsm(i32 %cond, i32 %N) { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.else, label %for.preheader + +for.preheader: + tail call void asm "nop", ""() + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] + tail call void asm sideeffect "add r4, #1", "~{r4}"() + %inc = add nuw nsw i32 %i.03, 1 + %exitcond = icmp eq i32 %inc, 10 + br i1 %exitcond, label %for.exit, label %for.body + +for.exit: + tail call void asm "nop", ""() + br label %if.end + +if.else: ; preds = %entry + %mul = shl nsw i32 %N, 1 + br label %if.end + +if.end: ; preds = %for.body, %if.else + %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ] + ret i32 %sum.0 +} + +; Check that we handle calls to variadic functions correctly. +; CHECK-LABEL: callVariadicFunc: +; +; ARM-ENABLE: cmp r0, #0 +; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] +; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; Prologue code. +; CHECK: push {r7, lr} +; CHECK-NEXT: mov r7, sp +; CHECK-NEXT: sub sp, {{(sp, )?}}#12 +; +; ARM-DISABLE: cmp r0, #0 +; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] +; THUMB-DISABLE-NEXT: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; Setup of the varags. +; CHECK: mov r0, r1 +; CHECK-NEXT: mov r2, r1 +; CHECK-NEXT: mov r3, r1 +; ARM-NEXT: str r1, [sp] +; ARM-NEXT: str r1, [sp, #4] +; THUMB-NEXT: strd r1, r1, [sp] +; CHECK-NEXT: str r1, [sp, #8] +; CHECK-NEXT: bl{{x?}} _someVariadicFunc +; CHECK-NEXT: lsl{{s?}} r0, r0, #3 +; ARM-NEXT: mov sp, r7 +; THUMB-NEXT: add sp, #12 +; CHECK-NEXT: pop {r7, pc} +; +; CHECK: [[ELSE_LABEL]]: @ %if.else +; Shift second argument by one and store into returned register. +; CHECK: lsl{{s?}} r0, r1, #1 +; +; Epilogue code. +; ENABLE-NEXT: bx lr +; +; ARM-DISABLE-NEXT: mov sp, r7 +; THUMB-DISABLE-NEXT: add sp, #12 +; DISABLE-NEXT: pop {r7, pc} +define i32 @callVariadicFunc(i32 %cond, i32 %N) { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N) + %shl = shl i32 %call, 3 + br label %if.end + +if.else: ; preds = %entry + %mul = shl nsw i32 %N, 1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ] + ret i32 %sum.0 +} + +declare i32 @someVariadicFunc(i32, ...) + +; Make sure we do not insert unreachable code after noreturn function. +; Although this is not incorrect to insert such code, it is useless +; and it hurts the binary size. +; +; CHECK-LABEL: noreturn: +; DISABLE: push +; +; CHECK: tst{{(\.w)?}} r0, #255 +; CHECK-NEXT: bne [[ABORT:LBB[0-9_]+]] +; +; CHECK: mov{{s?}} r0, #42 +; +; ENABLE-NEXT: bx lr +; +; DISABLE-NEXT: pop +;; +; CHECK: [[ABORT]]: @ %if.abort +; +; ENABLE: push +; +; CHECK: bl{{x?}} _abort +; ENABLE-NOT: pop +define i32 @noreturn(i8 signext %bad_thing) { +entry: + %tobool = icmp eq i8 %bad_thing, 0 + br i1 %tobool, label %if.end, label %if.abort + +if.abort: + %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"() + tail call void @abort() #0 + unreachable + +if.end: + ret i32 42 +} + +declare void @abort() #0 + +attributes #0 = { noreturn nounwind } diff --git a/test/CodeGen/Thumb/thumb-shrink-wrapping.ll b/test/CodeGen/Thumb/thumb-shrink-wrapping.ll new file mode 100644 index 00000000000..62f78258e7d --- /dev/null +++ b/test/CodeGen/Thumb/thumb-shrink-wrapping.ll @@ -0,0 +1,517 @@ +; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumb-macho \ +; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE +; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumb-macho \ +; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; +; Note: Lots of tests use inline asm instead of regular calls. +; This allows to have a better control on what the allocation will do. +; Otherwise, we may have spill right in the entry block, defeating +; shrink-wrapping. Moreover, some of the inline asm statements (nop) +; are here to ensure that the related paths do not end up as critical +; edges. +; Also disable the late if-converter as it makes harder to reason on +; the diffs. + +; Initial motivating example: Simple diamond with a call just on one side. +; CHECK-LABEL: foo: +; +; Compare the arguments and jump to exit. +; No prologue needed. +; ENABLE: cmp r0, r1 +; ENABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]] +; +; Prologue code. +; CHECK: push {r7, lr} +; CHECK-NEXT: sub sp, #8 +; +; Compare the arguments and jump to exit. +; After the prologue is set. +; DISABLE: cmp r0, r1 +; DISABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]] +; +; Store %a in the alloca. +; CHECK: str r0, [sp, #4] +; Set the alloca address in the second argument. +; Set the first argument to zero. +; CHECK: movs r0, #0 +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: bl +; +; With shrink-wrapping, epilogue is just after the call. +; ENABLE-NEXT: add sp, #8 +; ENABLE-NEXT: pop {r7, lr} +; +; CHECK: [[EXIT_LABEL]]: +; +; Without shrink-wrapping, epilogue is in the exit block. +; Epilogue code. (What we pop does not matter.) +; DISABLE: add sp, #8 +; DISABLE-NEXT: pop {r7, pc} +; +; ENABLE-NEXT: bx lr +define i32 @foo(i32 %a, i32 %b) { + %tmp = alloca i32, align 4 + %tmp2 = icmp slt i32 %a, %b + br i1 %tmp2, label %true, label %false + +true: + store i32 %a, i32* %tmp, align 4 + %tmp4 = call i32 @doSomething(i32 0, i32* %tmp) + br label %false + +false: + %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ] + ret i32 %tmp.0 +} + +; Function Attrs: optsize +declare i32 @doSomething(i32, i32*) + + +; Check that we do not perform the restore inside the loop whereas the save +; is outside. +; CHECK-LABEL: freqSaveAndRestoreOutsideLoop: +; +; Shrink-wrapping allows to skip the prologue in the else case. +; ENABLE: cmp r0, #0 +; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] +; +; Prologue code. +; Make sure we save the CSR used in the inline asm: r4. +; CHECK: push {r4, lr} +; +; DISABLE: cmp r0, #0 +; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] +; +; SUM is in r0 because it is coalesced with the second +; argument on the else path. +; CHECK: movs [[SUM:r0]], #0 +; CHECK-NEXT: movs [[IV:r[0-9]+]], #10 +; +; Next BB. +; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body +; CHECK: movs [[TMP:r[0-9]+]], #1 +; CHECK: adds [[SUM]], [[TMP]], [[SUM]] +; CHECK-NEXT: subs [[IV]], [[IV]], #1 +; CHECK-NEXT: cmp [[IV]], #0 +; CHECK-NEXT: bne [[LOOP]] +; +; Next BB. +; SUM << 3. +; CHECK: lsls [[SUM]], [[SUM]], #3 +; +; Duplicated epilogue. +; DISABLE: pop {r4, pc} +; +; CHECK: [[ELSE_LABEL]]: @ %if.else +; Shift second argument by one and store into returned register. +; CHECK: lsls r0, r1, #1 +; DISABLE-NEXT: pop {r4, pc} +; +; ENABLE-NEXT: bx lr +define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.else, label %for.preheader + +for.preheader: + tail call void asm "nop", ""() + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] + %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ] + %call = tail call i32 asm sideeffect "movs $0, #1", "=r,~{r4}"() + %add = add nsw i32 %call, %sum.04 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond = icmp eq i32 %inc, 10 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + %shl = shl i32 %add, 3 + br label %if.end + +if.else: ; preds = %entry + %mul = shl nsw i32 %N, 1 + br label %if.end + +if.end: ; preds = %if.else, %for.end + %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] + ret i32 %sum.1 +} + +declare i32 @something(...) + +; Check that we do not perform the shrink-wrapping inside the loop even +; though that would be legal. The cost model must prevent that. +; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2: +; Prologue code. +; Make sure we save the CSR used in the inline asm: r4. +; CHECK: push {r4 +; This is the nop. +; CHECK: mov r8, r8 +; CHECK: movs [[SUM:r0]], #0 +; CHECK-NEXT: movs [[IV:r[0-9]+]], #10 +; Next BB. +; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: @ %for.body +; CHECK: movs [[TMP:r[0-9]+]], #1 +; CHECK: adds [[SUM]], [[TMP]], [[SUM]] +; CHECK-NEXT: subs [[IV]], [[IV]], #1 +; CHECK-NEXT: cmp [[IV]], #0 +; CHECK-NEXT: bne [[LOOP_LABEL]] +; Next BB. +; CHECK: @ %for.exit +; This is the nop. +; CHECK: mov r8, r8 +; CHECK: pop {r4 +define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { +entry: + br label %for.preheader + +for.preheader: + tail call void asm "nop", ""() + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ] + %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ] + %call = tail call i32 asm sideeffect "movs $0, #1", "=r,~{r4}"() + %add = add nsw i32 %call, %sum.03 + %inc = add nuw nsw i32 %i.04, 1 + %exitcond = icmp eq i32 %inc, 10 + br i1 %exitcond, label %for.exit, label %for.body + +for.exit: + tail call void asm "nop", ""() + br label %for.end + +for.end: ; preds = %for.body + ret i32 %add +} + +; Check with a more complex case that we do not have save within the loop and +; restore outside. +; CHECK-LABEL: loopInfoSaveOutsideLoop: +; +; ENABLE: cmp r0, #0 +; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] +; +; Prologue code. +; Make sure we save the CSR used in the inline asm: r4. +; CHECK: push {r4, lr} +; +; DISABLE: cmp r0, #0 +; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] +; +; SUM is in r0 because it is coalesced with the second +; argument on the else path. +; CHECK: movs [[SUM:r0]], #0 +; CHECK-NEXT: movs [[IV:r[0-9]+]], #10 +; +; Next BB. +; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body +; CHECK: movs [[TMP:r[0-9]+]], #1 +; CHECK: adds [[SUM]], [[TMP]], [[SUM]] +; CHECK-NEXT: subs [[IV]], [[IV]], #1 +; CHECK-NEXT: cmp [[IV]], #0 +; CHECK-NEXT: bne [[LOOP]] +; +; Next BB. +; SUM << 3. +; CHECK: lsls [[SUM]], [[SUM]], #3 +; ENABLE-NEXT: pop {r4, lr} +; +; Duplicated epilogue. +; DISABLE: pop {r4, pc} +; +; CHECK: [[ELSE_LABEL]]: @ %if.else +; Shift second argument by one and store into returned register. +; CHECK: lsls r0, r1, #1 +; DISABLE-NEXT: pop {r4, pc} +; +; ENABLE-NEXT: bx lr +define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.else, label %for.preheader + +for.preheader: + tail call void asm "nop", ""() + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] + %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ] + %call = tail call i32 asm sideeffect "movs $0, #1", "=r,~{r4}"() + %add = add nsw i32 %call, %sum.04 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond = icmp eq i32 %inc, 10 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + tail call void asm "nop", "~{r4}"() + %shl = shl i32 %add, 3 + br label %if.end + +if.else: ; preds = %entry + %mul = shl nsw i32 %N, 1 + br label %if.end + +if.end: ; preds = %if.else, %for.end + %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] + ret i32 %sum.1 +} + +declare void @somethingElse(...) + +; Check with a more complex case that we do not have restore within the loop and +; save outside. +; CHECK-LABEL: loopInfoRestoreOutsideLoop: +; +; ENABLE: cmp r0, #0 +; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] +; +; Prologue code. +; Make sure we save the CSR used in the inline asm: r4. +; CHECK: push {r4, lr} +; +; DISABLE-NEXT: cmp r0, #0 +; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] +; +; SUM is in r0 because it is coalesced with the second +; argument on the else path. +; CHECK: movs [[SUM:r0]], #0 +; CHECK-NEXT: movs [[IV:r[0-9]+]], #10 +; +; Next BB. +; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body +; CHECK: movs [[TMP:r[0-9]+]], #1 +; CHECK: adds [[SUM]], [[TMP]], [[SUM]] +; CHECK-NEXT: subs [[IV]], [[IV]], #1 +; CHECK-NEXT: cmp [[IV]], #0 +; CHECK-NEXT: bne [[LOOP]] +; +; Next BB. +; SUM << 3. +; CHECK: lsls [[SUM]], [[SUM]], #3 +; ENABLE: pop {r4, lr} +; +; Duplicated epilogue. +; DISABLE: pop {r4, pc} +; +; CHECK: [[ELSE_LABEL]]: @ %if.else +; Shift second argument by one and store into returned register. +; CHECK: lsls r0, r1, #1 +; DISABLE-NEXT: pop {r4, pc} +; +; ENABLE-NEXT: bx lr +define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + tail call void asm "nop", "~{r4}"() + br label %for.body + +for.body: ; preds = %for.body, %if.then + %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ] + %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ] + %call = tail call i32 asm sideeffect "movs $0, #1", "=r,~{r4}"() + %add = add nsw i32 %call, %sum.04 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond = icmp eq i32 %inc, 10 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + %shl = shl i32 %add, 3 + br label %if.end + +if.else: ; preds = %entry + %mul = shl nsw i32 %N, 1 + br label %if.end + +if.end: ; preds = %if.else, %for.end + %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] + ret i32 %sum.1 +} + +; Check that we handle function with no frame information correctly. +; CHECK-LABEL: emptyFrame: +; CHECK: @ %entry +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: bx lr +define i32 @emptyFrame() { +entry: + ret i32 0 +} + +; Check that we handle inline asm correctly. +; CHECK-LABEL: inlineAsm: +; +; ENABLE: cmp r0, #0 +; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] +; +; Prologue code. +; Make sure we save the CSR used in the inline asm: r4. +; CHECK: push {r4, lr} +; +; DISABLE: cmp r0, #0 +; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] +; +; CHECK: movs [[IV:r[0-9]+]], #10 +; +; Next BB. +; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body +; CHECK: movs r4, #1 +; CHECK: subs [[IV]], [[IV]], #1 +; CHECK-NEXT: cmp [[IV]], #0 +; CHECK-NEXT: bne [[LOOP]] +; +; Next BB. +; CHECK: movs r0, #0 +; ENABLE-NEXT: pop {r4, lr} +; +; Duplicated epilogue. +; DISABLE-NEXT: pop {r4, pc} +; +; CHECK: [[ELSE_LABEL]]: @ %if.else +; Shift second argument by one and store into returned register. +; CHECK: lsls r0, r1, #1 +; DISABLE-NEXT: pop {r4, pc} +; +; ENABLE-NEXT: bx lr +define i32 @inlineAsm(i32 %cond, i32 %N) { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.else, label %for.preheader + +for.preheader: + tail call void asm "nop", ""() + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] + tail call void asm sideeffect "movs r4, #1", "~{r4}"() + %inc = add nuw nsw i32 %i.03, 1 + %exitcond = icmp eq i32 %inc, 10 + br i1 %exitcond, label %for.exit, label %for.body + +for.exit: + tail call void asm "nop", ""() + br label %if.end + +if.else: ; preds = %entry + %mul = shl nsw i32 %N, 1 + br label %if.end + +if.end: ; preds = %for.body, %if.else + %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ] + ret i32 %sum.0 +} + +; Check that we handle calls to variadic functions correctly. +; CHECK-LABEL: callVariadicFunc: +; +; ENABLE: cmp r0, #0 +; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] +; +; Prologue code. +; CHECK: push {[[TMP:r[0-9]+]], lr} +; CHECK-NEXT: sub sp, #16 +; +; DISABLE: cmp r0, #0 +; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] +; +; Setup of the varags. +; CHECK: mov [[TMP_SP:r[0-9]+]], sp +; CHECK-NEXT: str r1, {{\[}}[[TMP_SP]]] +; CHECK-NEXT: str r1, {{\[}}[[TMP_SP]], #4] +; CHECK-NEXT: str r1, {{\[}}[[TMP_SP]], #8] +; Thumb has quite a strange way for moving stuff +; in around. Oh well, match the current sequence. +; CHECK: push {r1} +; CHECK-NEXT: pop {r0} +; CHECK: push {r1} +; CHECK-NEXT: pop {r2} +; CHECK: push {r1} +; CHECK-NEXT: pop {r3} +; CHECK-NEXT: bl +; CHECK-NEXT: lsls r0, r0, #3 +; CHECK-NEXT: add sp, #16 +; +; ENABLE-NEXT: pop {[[TMP]], lr} +; +; Duplicated epilogue. +; DISABLE-NEXT: pop {[[TMP]], pc} +; +; CHECK: [[ELSE_LABEL]]: @ %if.else +; Shift second argument by one and store into returned register. +; CHECK: lsls r0, r1, #1 +; +; Epilogue code. +; ENABLE-NEXT: bx lr +; +; DISABLE-NEXT: add sp, #16 +; DISABLE-NEXT: pop {[[TMP]], pc} +define i32 @callVariadicFunc(i32 %cond, i32 %N) { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N) + %shl = shl i32 %call, 3 + br label %if.end + +if.else: ; preds = %entry + %mul = shl nsw i32 %N, 1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ] + ret i32 %sum.0 +} + +declare i32 @someVariadicFunc(i32, ...) + +; Make sure we do not insert unreachable code after noreturn function. +; Although this is not incorrect to insert such code, it is useless +; and it hurts the binary size. +; +; CHECK-LABEL: noreturn: +; DISABLE: push +; +; CHECK: movs [[TMP:r[0-9]+]], #255 +; CHECK-NEXT: tst r0, [[TMP]] +; CHECK-NEXT: bne [[ABORT:LBB[0-9_]+]] +; +; CHECK: movs r0, #42 +; +; ENABLE-NEXT: bx lr +; +; DISABLE-NEXT: pop +;; +; CHECK: [[ABORT]]: @ %if.abort +; +; ENABLE: push +; +; CHECK: bl +; ENABLE-NOT: pop +define i32 @noreturn(i8 signext %bad_thing) { +entry: + %tobool = icmp eq i8 %bad_thing, 0 + br i1 %tobool, label %if.end, label %if.abort + +if.abort: + %call = tail call i32 asm sideeffect "movs $0, #1", "=r,~{r4}"() + tail call void @abort() #0 + unreachable + +if.end: + ret i32 42 +} + +declare void @abort() #0 + +attributes #0 = { noreturn nounwind }