From: Anton Korobeynikov Date: Mon, 3 Aug 2009 08:12:53 +0000 (+0000) Subject: Unbreak Win64 CC. Step one: honour register save area, fix some alignment and provide... X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=cf6b739d3d4921dc9fc6908ec2009055c0927125;p=oota-llvm.git Unbreak Win64 CC. Step one: honour register save area, fix some alignment and provide a different set of call-clobberred registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@77962 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index e9fcbd5a489..25da8f8e41f 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -201,8 +201,8 @@ def CC_X86_Win64_C : CallingConv<[ [XMM0, XMM1, XMM2, XMM3]>>, // Integer/FP values get stored in stack slots that are 8 bytes in size and - // 16-byte aligned if there are no more registers to hold them. - CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 16>>, + // 8-byte aligned if there are no more registers to hold them. + CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, // Long doubles get stack slots whose size and alignment depends on the // subtarget. @@ -211,8 +211,8 @@ def CC_X86_Win64_C : CallingConv<[ // Vectors get 16-byte stack slots that are 16-byte aligned. CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, - // __m64 vectors get 8-byte stack slots that are 16-byte aligned. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 16>> + // __m64 vectors get 8-byte stack slots that are 8-byte aligned. + CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> ]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86CompilationCallback_Win64.asm b/lib/Target/X86/X86CompilationCallback_Win64.asm index 8002f98765f..a11c5c3ce43 100644 --- a/lib/Target/X86/X86CompilationCallback_Win64.asm +++ b/lib/Target/X86/X86CompilationCallback_Win64.asm @@ -15,52 +15,52 @@ extrn X86CompilationCallback2: PROC .code X86CompilationCallback proc + ; Save all int arg registers into register spill area. + mov [rsp+ 8], rcx + mov [rsp+16], rdx + mov [rsp+24], r8 + mov [rsp+32], r9 + push rbp - ; Save RSP + ; Save RSP. mov rbp, rsp - ; Save all int arg registers - push rcx - push rdx - push r8 - push r9 - ; Align stack on 16-byte boundary. and rsp, -16 - ; Save all XMM arg registers - sub rsp, 64 - movaps [rsp], xmm0 - movaps [rsp+16], xmm1 - movaps [rsp+32], xmm2 - movaps [rsp+48], xmm3 + ; Save all XMM arg registers. Also allocate reg spill area. + sub rsp, 96 + movaps [rsp +32], xmm0 + movaps [rsp+16+32], xmm1 + movaps [rsp+32+32], xmm2 + movaps [rsp+48+32], xmm3 ; JIT callee - ; Pass prev frame and return address + ; Pass prev frame and return address. mov rcx, rbp mov rdx, qword ptr [rbp+8] call X86CompilationCallback2 - ; Restore all XMM arg registers - movaps xmm3, [rsp+48] - movaps xmm2, [rsp+32] - movaps xmm1, [rsp+16] - movaps xmm0, [rsp] + ; Restore all XMM arg registers. + movaps xmm3, [rsp+48+32] + movaps xmm2, [rsp+32+32] + movaps xmm1, [rsp+16+32] + movaps xmm0, [rsp +32] - ; Restore RSP + ; Restore RSP. mov rsp, rbp - ; Restore all int arg registers - sub rsp, 32 - pop r9 - pop r8 - pop rdx - pop rcx - - ; Restore RBP + ; Restore RBP. pop rbp + + ; Restore all int arg registers. + mov r9, [rsp+32] + mov r8, [rsp+24] + mov rdx, [rsp+16] + mov rcx, [rsp+ 8] + ret X86CompilationCallback endp diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 36ea94b71eb..b151349688c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1228,7 +1228,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, MVT::v2i64, InFlag).getValue(1); Val = Chain.getValue(0); Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, - Val, DAG.getConstant(0, MVT::i64)); + Val, DAG.getConstant(0, MVT::i64)); } else { Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i64, InFlag).getValue(1); @@ -1628,8 +1628,9 @@ X86TargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, const CCValAssign &VA, SDValue Chain, SDValue Arg, ISD::ArgFlagsTy Flags) { + const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0); DebugLoc dl = TheCall->getDebugLoc(); - unsigned LocMemOffset = VA.getLocMemOffset(); + unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); if (Flags.isByVal()) { diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 5935745777e..c8712d4a48b 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -128,13 +128,37 @@ let isCall = 1 in def CALL64pcrel32 : Ii32<0xE8, RawFrm, (outs), (ins i64i32imm_pcrel:$dst, variable_ops), "call\t$dst", []>, - Requires<[In64BitMode]>; + Requires<[In64BitMode, NotWin64]>; def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), - "call\t{*}$dst", [(X86call GR64:$dst)]>; + "call\t{*}$dst", [(X86call GR64:$dst)]>, + Requires<[NotWin64]>; def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops), - "call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>; + "call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>, + Requires<[NotWin64]>; } + // FIXME: We need to teach codegen about single list of call-clobbered registers. +let isCall = 1 in + // All calls clobber the non-callee saved registers. RSP is marked as + // a use to prevent stack-pointer assignments that appear immediately + // before calls from potentially appearing dead. Uses for argument + // registers are added manually. + let Defs = [RAX, RCX, RDX, R8, R9, R10, R11, + FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS], + Uses = [RSP] in { + def WINCALL64pcrel32 : I<0xE8, RawFrm, + (outs), (ins i64i32imm:$dst, variable_ops), + "call\t${dst:call}", [(X86call imm:$dst)]>, + Requires<[IsWin64]>; + def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), + "call\t{*}$dst", + [(X86call GR64:$dst)]>, Requires<[IsWin64]>; + def WINCALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops), + "call\t{*}$dst", + [(X86call (loadi64 addr:$dst))]>, Requires<[IsWin64]>; + } let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in @@ -1495,9 +1519,14 @@ def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst), // Direct PC relative function call for small code model. 32-bit displacement // sign extended to 64-bit. def : Pat<(X86call (i64 tglobaladdr:$dst)), - (CALL64pcrel32 tglobaladdr:$dst)>; + (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>; def : Pat<(X86call (i64 texternalsym:$dst)), - (CALL64pcrel32 texternalsym:$dst)>; + (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>; + +def : Pat<(X86call (i64 tglobaladdr:$dst)), + (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>; +def : Pat<(X86call (i64 texternalsym:$dst)), + (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>; def : Pat<(X86tailcall (i64 tglobaladdr:$dst)), (CALL64pcrel32 tglobaladdr:$dst)>; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 02ddf2583e1..7fb78f57b9e 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -254,6 +254,8 @@ def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def In32BitMode : Predicate<"!Subtarget->is64Bit()">; def In64BitMode : Predicate<"Subtarget->is64Bit()">; +def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; +def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">; def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">; def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">; def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 3fa53733c0d..5817bf0ac64 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -484,15 +484,18 @@ bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg, int X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { - int Offset = MF.getFrameInfo()->getObjectOffset(FI) + SlotSize; - uint64_t StackSize = MF.getFrameInfo()->getStackSize(); + const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea(); + uint64_t StackSize = MFI->getStackSize(); if (needsStackRealignment(MF)) { if (FI < 0) // Skip the saved EBP Offset += SlotSize; else { - unsigned Align = MF.getFrameInfo()->getObjectAlignment(FI); + unsigned Align = MFI->getObjectAlignment(FI); assert( (-(Offset + StackSize)) % Align == 0); Align = 0; return Offset + StackSize; @@ -622,14 +625,14 @@ void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, void X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { - MachineFrameInfo *FFI = MF.getFrameInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); // Calculate and set max stack object alignment early, so we can decide // whether we will need stack realignment (and thus FP). - unsigned MaxAlign = std::max(FFI->getMaxAlignment(), - calculateMaxStackAlignment(FFI)); + unsigned MaxAlign = std::max(MFI->getMaxAlignment(), + calculateMaxStackAlignment(MFI)); - FFI->setMaxAlignment(MaxAlign); + MFI->setMaxAlignment(MaxAlign); X86MachineFunctionInfo *X86FI = MF.getInfo(); int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); @@ -643,18 +646,18 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // ... // } // [EBP] - MF.getFrameInfo()-> - CreateFixedObject(-TailCallReturnAddrDelta, - (-1*SlotSize)+TailCallReturnAddrDelta); + MFI->CreateFixedObject(-TailCallReturnAddrDelta, + (-1*SlotSize)+TailCallReturnAddrDelta); } + if (hasFP(MF)) { assert((TailCallReturnAddrDelta <= 0) && "The Delta should always be zero or negative"); // Create a frame entry for the EBP register that must be saved. - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, - (int)SlotSize * -2+ - TailCallReturnAddrDelta); - assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() && + int FrameIdx = MFI->CreateFixedObject(SlotSize, + (int)SlotSize * -2+ + TailCallReturnAddrDelta); + assert(FrameIdx == MFI->getObjectIndexBegin() && "Slot for EBP register must be last in order to be found!"); FrameIdx = 0; } @@ -887,6 +890,11 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); MFI->setStackSize(StackSize); + } else if (Subtarget->isTargetWin64()) { + // We need to always allocate 32 bytes as register spill area. + // FIXME: we might reuse these 32 bytes for leaf functions. + StackSize += 32; + MFI->setStackSize(StackSize); } // Insert stack pointer adjustment for later moving of return addr. Only diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 43340f5124f..704c7d3a88c 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -64,7 +64,9 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, Subtarget(TT, FS, is64Bit), DataLayout(Subtarget.getDataLayout()), FrameInfo(TargetFrameInfo::StackGrowsDown, - Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4), + Subtarget.getStackAlignment(), + (Subtarget.isTargetWin64() ? -40 : + (Subtarget.is64Bit() ? -8 : -4))), InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) { DefRelocModel = getRelocationModel(); diff --git a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll index c628b8affdd..a96fcb2e835 100644 --- a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll +++ b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc | grep "subq.*\\\$8, \\\%rsp" +; RUN: llvm-as < %s | llc | grep "subq.*\\\$40, \\\%rsp" target triple = "x86_64-mingw64" define x86_fp80 @a(i64 %x) nounwind readnone { diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll index 33d797297be..7dfb65a23f9 100644 --- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll +++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll @@ -1,7 +1,7 @@ ; RUN: llvm-as < %s | llc -o %t1 -f -; RUN: grep "subq.*\\\$40, \\\%rsp" %t1 -; RUN: grep "movaps \\\%xmm8, \\\(\\\%rsp\\\)" %t1 -; RUN: grep "movaps \\\%xmm7, 16\\\(\\\%rsp\\\)" %t1 +; RUN: grep "subq.*\\\$72, \\\%rsp" %t1 +; RUN: grep "movaps \\\%xmm8, 32\\\(\\\%rsp\\\)" %t1 +; RUN: grep "movaps \\\%xmm7, 48\\\(\\\%rsp\\\)" %t1 target triple = "x86_64-mingw64" define i32 @a() nounwind {