From: Anton Korobeynikov Date: Tue, 17 Apr 2007 09:20:00 +0000 (+0000) Subject: Implemented correct stack probing on mingw/cygwin for dynamic alloca's. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=57fc00d5cf47343ba762493b8781ca0b14489c35;p=oota-llvm.git Implemented correct stack probing on mingw/cygwin for dynamic alloca's. Also, fixed static case in presence of eax livin. This fixes PR331 PS: Why don't we still have push/pop instructions? :) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@36195 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 0f135e22793..74f405f78a4 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -430,10 +430,11 @@ namespace ISD { TRUNCSTORE, // DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned - // to a specified boundary. The first operand is the token chain, the - // second is the number of bytes to allocate, and the third is the alignment - // boundary. The size is guaranteed to be a multiple of the stack - // alignment, and the alignment is guaranteed to be bigger than the stack + // to a specified boundary. This node always has two return values: a new + // stack pointer value and a chain. The first operand is the token chain, + // the second is the number of bytes to allocate, and the third is the + // alignment boundary. The size is guaranteed to be a multiple of the stack + // alignment, and the alignment is guaranteed to be bigger than the stack // alignment (if required) or 0 to get standard stack alignment. DYNAMIC_STACKALLOC, diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 83704a34d1d..ac7251f8982 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -1281,8 +1281,8 @@ static void generateCompilerSpecificCode(std::ostream& Out) { // Alloca is hard to get, and we don't want to include stdlib.h here. Out << "/* get a declaration for alloca */\n" << "#if defined(__CYGWIN__) || defined(__MINGW32__)\n" - << "extern void *_alloca(unsigned long);\n" - << "#define alloca(x) _alloca(x)\n" + << "#define alloca(x) __builtin_alloca((x))\n" + << "#define _alloca(x) __builtin_alloca((x))\n" << "#elif defined(__APPLE__)\n" << "extern void *__builtin_alloca(unsigned long);\n" << "#define alloca(x) __builtin_alloca(x)\n" diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 664c7e06387..209b17a5c90 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -237,7 +237,10 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); if (Subtarget->is64Bit()) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); + if (Subtarget->isTargetCygMing()) + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + else + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); if (X86ScalarSSE) { // Set up the FP register classes. @@ -3401,6 +3404,36 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { } } +SDOperand X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, + SelectionDAG &DAG) { + // Get the inputs. + SDOperand Chain = Op.getOperand(0); + SDOperand Size = Op.getOperand(1); + // FIXME: Ensure alignment here + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + MVT::ValueType IntPtr = getPointerTy(); + MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32); + const Type *IntPtrTy = getTargetData()->getIntPtrType(); + + Entry.Node = Size; + Entry.Ty = IntPtrTy; + Entry.isInReg = true; // Should pass in EAX + Args.push_back(Entry); + std::pair CallResult = + LowerCallTo(Chain, IntPtrTy, false, false, CallingConv::C, false, + DAG.getExternalSymbol("_alloca", IntPtr), Args, DAG); + + SDOperand SP = DAG.getCopyFromReg(CallResult.second, X86StackPtr, SPTy); + + std::vector Tys; + Tys.push_back(SPTy); + Tys.push_back(MVT::Other); + SDOperand Ops[2] = { SP, CallResult.second }; + return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2); +} + SDOperand X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); @@ -4002,6 +4035,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); } return SDOperand(); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 7d40e30b397..8b9c269dca4 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -401,6 +401,7 @@ namespace llvm { SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG); SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG); SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG); + SDOperand LowerDYNAMIC_STACKALLOC(SDOperand Op, SelectionDAG &DAG); SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG); SDOperand LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG); SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG); diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 8e4e7d7ecb4..bfdaff6b1a8 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -477,6 +477,9 @@ def LEAVE : I<0xC9, RawFrm, def POP32r : I<0x58, AddRegFrm, (ops GR32:$reg), "pop{l} $reg", []>, Imp<[ESP],[ESP]>; +def PUSH32r : I<0x50, AddRegFrm, + (ops GR32:$reg), "push{l} $reg", []>, Imp<[ESP],[ESP]>; + def MovePCtoStack : I<0, Pseudo, (ops piclabel:$label), "call $label", []>; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 3737c0e79e7..cd2a0d4a616 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -1039,14 +1039,39 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (NumBytes) { // adjust stack pointer: ESP -= numbytes if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) { + // Check, whether EAX is livein for this function + bool isEAXAlive = false; + for (MachineFunction::livein_iterator II = MF.livein_begin(), + EE = MF.livein_end(); (II != EE) && !isEAXAlive; ++II) { + unsigned Reg = II->first; + isEAXAlive = (Reg == X86::EAX || Reg == X86::AX || + Reg == X86::AH || Reg == X86::AL); + } + // Function prologue calls _alloca to probe the stack when allocating // more than 4k bytes in one go. Touching the stack at 4K increments is // necessary to ensure that the guard pages used by the OS virtual memory // manager are allocated in correct sequence. - MI = BuildMI(TII.get(X86::MOV32ri), X86::EAX).addImm(NumBytes); - MBB.insert(MBBI, MI); - MI = BuildMI(TII.get(X86::CALLpcrel32)).addExternalSymbol("_alloca"); - MBB.insert(MBBI, MI); + if (!isEAXAlive) { + MI = BuildMI(TII.get(X86::MOV32ri), X86::EAX).addImm(NumBytes); + MBB.insert(MBBI, MI); + MI = BuildMI(TII.get(X86::CALLpcrel32)).addExternalSymbol("_alloca"); + MBB.insert(MBBI, MI); + } else { + // Save EAX + MI = BuildMI(TII.get(X86::PUSH32r), X86::EAX); + MBB.insert(MBBI, MI); + // Allocate NumBytes-4 bytes on stack. We'll also use 4 already + // allocated bytes for EAX. + MI = BuildMI(TII.get(X86::MOV32ri), X86::EAX).addImm(NumBytes-4); + MBB.insert(MBBI, MI); + MI = BuildMI(TII.get(X86::CALLpcrel32)).addExternalSymbol("_alloca"); + MBB.insert(MBBI, MI); + // Restore EAX + MI = addRegOffset(BuildMI(TII.get(X86::MOV32rm), X86::EAX), + StackPtr, NumBytes-4); + MBB.insert(MBBI, MI); + } } else { unsigned Opc = (NumBytes < 128) ? (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : diff --git a/test/CodeGen/X86/mingw-alloca.ll b/test/CodeGen/X86/mingw-alloca.ll new file mode 100644 index 00000000000..dd458835ddf --- /dev/null +++ b/test/CodeGen/X86/mingw-alloca.ll @@ -0,0 +1,27 @@ +; RUN: llvm-as < %s | llc -o %t -f +; RUN: grep __alloca %t | wc -l | grep 2 +; RUN: grep 8028 %t +; RUN: grep {pushl %eax} %t +; RUN: grep 8024 %t | wc -l | grep 2 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" +target triple = "i386-mingw32" + +define void @foo1(i32 %N) { +entry: + %tmp14 = alloca i32, i32 %N ; [#uses=1] + call void @bar1( i32* %tmp14 ) + ret void +} + +declare void @bar1(i32*) + +define void @foo2(i32 inreg %N) { +entry: + %A2 = alloca [2000 x i32], align 16 ; <[2000 x i32]*> [#uses=1] + %A2.sub = getelementptr [2000 x i32]* %A2, i32 0, i32 0 ; [#uses=1] + call void @bar2( i32* %A2.sub, i32 %N ) + ret void +} + +declare void @bar2(i32*, i32)