From bd9bd9e500364fef22520269ef95fe480ba0f708 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Sat, 28 Nov 2015 11:02:32 +0000 Subject: [PATCH] [Stack realignment] Handling of aligned allocas. This patch implements dynamic realignment of stack objects for targets with a non-realigned stack pointer. Behaviour in FunctionLoweringInfo is changed so that for a target that has StackRealignable set to false, over-aligned static allocas are considered to be variable-sized objects and are handled with DYNAMIC_STACKALLOC nodes. It would be good to group aligned allocas into a single big alloca as an optimization, but this is yet todo. SystemZ benefits from this, due to its stack frame layout. New tests SystemZ/alloca-03.ll for aligned allocas, and SystemZ/alloca-04.ll for "no-realign-stack" attribute on functions. Review and help from Ulrich Weigand and Hal Finkel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254227 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/MachineFrameInfo.h | 10 +++ .../SelectionDAG/FunctionLoweringInfo.cpp | 28 ++++--- lib/Target/SystemZ/SystemZFrameLowering.cpp | 3 +- lib/Target/SystemZ/SystemZISelLowering.cpp | 32 ++++++- test/CodeGen/SystemZ/alloca-03.ll | 84 +++++++++++++++++++ test/CodeGen/SystemZ/alloca-04.ll | 14 ++++ 6 files changed, 156 insertions(+), 15 deletions(-) create mode 100644 test/CodeGen/SystemZ/alloca-03.ll create mode 100644 test/CodeGen/SystemZ/alloca-04.ll diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index f95cbdb49af..f5a688458d2 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -125,6 +125,16 @@ class MachineFrameInfo { unsigned StackAlignment; /// Can the stack be realigned. + /// Targets that set this to false don't have the ability to overalign + /// their stack frame, and thus, overaligned allocas are all treated + /// as dynamic allocations and the target must handle them as part + /// of DYNAMIC_STACKALLOC lowering. + /// FIXME: There is room for improvement in this case, in terms of + /// grouping overaligned allocas into a "secondary stack frame" and + /// then only use a single alloca to allocate this frame and only a + /// single virtual register to access it. Currently, without such an + /// optimization, each such alloca gets it's own dynamic + /// realignment. bool StackRealignable; /// The list of stack objects allocated. diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 8d42dcef140..ff0ccd415db 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -87,6 +87,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); MachineModuleInfo &MMI = MF->getMMI(); + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); // Check whether the function can return without sret-demotion. SmallVector Outs; @@ -103,28 +104,29 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { if (const AllocaInst *AI = dyn_cast(I)) { - // Static allocas can be folded into the initial stack frame adjustment. - if (AI->isStaticAlloca()) { + Type *Ty = AI->getAllocatedType(); + unsigned Align = + std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty), + AI->getAlignment()); + unsigned StackAlign = TFI->getStackAlignment(); + + // Static allocas can be folded into the initial stack frame + // adjustment. For targets that don't realign the stack, don't + // do this if there is an extra alignment requirement. + if (AI->isStaticAlloca() && + (TFI->isStackRealignable() || (Align <= StackAlign))) { const ConstantInt *CUI = cast(AI->getArraySize()); - Type *Ty = AI->getAllocatedType(); uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty); - unsigned Align = - std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty), - AI->getAlignment()); TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. StaticAllocaMap[AI] = MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); - } else { - unsigned Align = - std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment( - AI->getAllocatedType()), - AI->getAlignment()); - unsigned StackAlign = - MF->getSubtarget().getFrameLowering()->getStackAlignment(); + // FIXME: Overaligned static allocas should be grouped into + // a single dynamic allocation instead of using a separate + // stack allocation for each one. if (Align <= StackAlign) Align = 0; // Inform the Frame Information that we have variable-sized objects. diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp index 9eeb0466446..e1b20d0536d 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -48,7 +48,8 @@ static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = { SystemZFrameLowering::SystemZFrameLowering() : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, - -SystemZMC::CallFrameSize, 8) { + -SystemZMC::CallFrameSize, 8, + false /* StackRealignable */) { // Create a mapping from register number to save slot offset. RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I) diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index fa9ee6e2eeb..5959e90d2f6 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2739,17 +2739,37 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, SDValue SystemZTargetLowering:: lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { + const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + bool RealignOpt = !DAG.getMachineFunction().getFunction()-> + hasFnAttribute("no-realign-stack"); + SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); + SDValue Align = Op.getOperand(2); SDLoc DL(Op); + // If user has set the no alignment function attribute, ignore + // alloca alignments. + uint64_t AlignVal = (RealignOpt ? + dyn_cast(Align)->getZExtValue() : 0); + + uint64_t StackAlign = TFI->getStackAlignment(); + uint64_t RequiredAlign = std::max(AlignVal, StackAlign); + uint64_t ExtraAlignSpace = RequiredAlign - StackAlign; + unsigned SPReg = getStackPointerRegisterToSaveRestore(); + SDValue NeededSpace = Size; // Get a reference to the stack pointer. SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64); + // Add extra space for alignment if needed. + if (ExtraAlignSpace) + NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace, + DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); + // Get the new stack pointer value. - SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, Size); + SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); // Copy the new stack pointer back. Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); @@ -2760,6 +2780,16 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust); + // Dynamically realign if needed. + if (RequiredAlign > StackAlign) { + Result = + DAG.getNode(ISD::ADD, DL, MVT::i64, Result, + DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); + Result = + DAG.getNode(ISD::AND, DL, MVT::i64, Result, + DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64)); + } + SDValue Ops[2] = { Result, Chain }; return DAG.getMergeValues(Ops, DL); } diff --git a/test/CodeGen/SystemZ/alloca-03.ll b/test/CodeGen/SystemZ/alloca-03.ll new file mode 100644 index 00000000000..ece1198ad62 --- /dev/null +++ b/test/CodeGen/SystemZ/alloca-03.ll @@ -0,0 +1,84 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Allocate 8 bytes, no need to align stack. +define void @f0() { +; CHECK-LABEL: f0: +; CHECK: aghi %r15, -168 +; CHECK-NOT: nil +; CHECK: mvghi 160(%r15), 10 +; CHECK: aghi %r15, 168 + %x = alloca i64 + store volatile i64 10, i64* %x + ret void +} + +; Allocate %len * 8, no need to align stack. +define void @f1(i64 %len) { +; CHECK-LABEL: f1: +; CHECK: sllg %r0, %r2, 3 +; CHECK: lgr %r1, %r15 +; CHECK: sgr %r1, %r0 +; CHECK-NOT: ngr +; CHECK: lgr %r15, %r1 +; CHECK: la %r1, 160(%r1) +; CHECK: mvghi 0(%r1), 10 + %x = alloca i64, i64 %len + store volatile i64 10, i64* %x + ret void +} + +; Static alloca, align 128. +define void @f2() { +; CHECK-LABEL: f2: +; CHECK: aghi %r1, -128 +; CHECK: lgr %r15, %r1 +; CHECK: la %r1, 280(%r1) +; CHECK: nill %r1, 65408 +; CHECK: mvghi 0(%r1), 10 + %x = alloca i64, i64 1, align 128 + store volatile i64 10, i64* %x, align 128 + ret void +} + +; Dynamic alloca, align 128. +define void @f3(i64 %len) { +; CHECK-LABEL: f3: +; CHECK: sllg %r1, %r2, 3 +; CHECK: la %r0, 120(%r1) +; CHECK: lgr %r1, %r15 +; CHECK: sgr %r1, %r0 +; CHECK: lgr %r15, %r1 +; CHECK: la %r1, 280(%r1) +; CHECK: nill %r1, 65408 +; CHECK: mvghi 0(%r1), 10 + %x = alloca i64, i64 %len, align 128 + store volatile i64 10, i64* %x, align 128 + ret void +} + +; Static alloca w/out alignment - part of frame. +define void @f4() { +; CHECK-LABEL: f4: +; CHECK: aghi %r15, -168 +; CHECK: mvhi 164(%r15), 10 +; CHECK: aghi %r15, 168 + %x = alloca i32 + store volatile i32 10, i32* %x + ret void +} + +; Static alloca of one i32, aligned by 128. +define void @f5() { +; CHECK-LABEL: f5: + +; CHECK: lgr %r1, %r15 +; CHECK: aghi %r1, -128 +; CHECK: lgr %r15, %r1 +; CHECK: la %r1, 280(%r1) +; CHECK: nill %r1, 65408 +; CHECK: mvhi 0(%r1), 10 + %x = alloca i32, i64 1, align 128 + store volatile i32 10, i32* %x + ret void +} + diff --git a/test/CodeGen/SystemZ/alloca-04.ll b/test/CodeGen/SystemZ/alloca-04.ll new file mode 100644 index 00000000000..c7ecf38247a --- /dev/null +++ b/test/CodeGen/SystemZ/alloca-04.ll @@ -0,0 +1,14 @@ +; Check the "no-realign-stack" function attribute. We should get a warning. + +; RUN: llc < %s -mtriple=s390x-linux-gnu -debug-only=codegen 2>&1 | \ +; RUN: FileCheck %s + + +define void @f6() "no-realign-stack" { + %x = alloca i64, i64 1, align 128 + store volatile i64 10, i64* %x, align 128 + ret void +} + +; CHECK: Warning: requested alignment 128 exceeds the stack alignment 8 +; CHECK-NOT: nill -- 2.34.1