From aa4e6afc9be330770e0b5d41e79aa26c3115bcca Mon Sep 17 00:00:00 2001 From: Stuart Hastings Date: Thu, 26 May 2011 02:44:56 +0000 Subject: [PATCH] Correctly handle a one-word struct passed byval on x86_64. rdar://problem/6920088 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@132105 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CallingConvLower.cpp | 9 ++- lib/Target/X86/X86ISelLowering.cpp | 85 +++++++++++++++++---- lib/Target/X86/X86ISelLowering.h | 2 + test/CodeGen/X86/2011-05-18-byval-in-reg.ll | 35 +++++++++ 4 files changed, 116 insertions(+), 15 deletions(-) create mode 100644 test/CodeGen/X86/2011-05-18-byval-in-reg.ll diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index bfb6ba10234..d9cadf090f5 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -48,8 +48,13 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT, if (MinAlign > (int)Align) Align = MinAlign; TM.getTargetLowering()->HandleByVal(const_cast(this), Size); - unsigned Offset = AllocateStack(Size, Align); - addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + if (Size) { + unsigned Offset = AllocateStack(Size, Align); + addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + } else { + addLoc(CCValAssign::getReg(ValNo, ValVT, getFirstByValReg(), LocVT, + LocInfo)); + } } /// MarkAllocated - Mark a register and all of its aliases as allocated. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c8c3f59c58e..fd7d1f730d4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2091,28 +2091,54 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, } if (VA.isRegLoc()) { - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - if (isVarArg && IsWin64) { - // Win64 ABI requires argument XMM reg to be copied to the corresponding - // shadow reg if callee is a varargs function. - unsigned ShadowReg = 0; - switch (VA.getLocReg()) { - case X86::XMM0: ShadowReg = X86::RCX; break; - case X86::XMM1: ShadowReg = X86::RDX; break; - case X86::XMM2: ShadowReg = X86::R8; break; - case X86::XMM3: ShadowReg = X86::R9; break; + if (isByVal) { + if (CCInfo.isFirstByValRegValid()) { + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + unsigned reg = CCInfo.getFirstByValReg(); + SDValue Load = DAG.getLoad(PtrVT, dl, Chain, Arg, + MachinePointerInfo(), + false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(reg, Load)); + if (Flags.getByValSize() > 8) { + SDValue Const8 = DAG.getConstant(8, MVT::i32); + SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const8); + SDValue Load2 = DAG.getLoad(PtrVT, dl, Chain, AddArg, + MachinePointerInfo(), + false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(reg+1, Load)); + } + CCInfo.clearFirstByValReg(); + } + } else { + // Usual case: + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + if (isVarArg && IsWin64) { + // Win64 ABI requires argument XMM reg to be copied to the corresponding + // shadow reg if callee is a varargs function. + unsigned ShadowReg = 0; + switch (VA.getLocReg()) { + case X86::XMM0: ShadowReg = X86::RCX; break; + case X86::XMM1: ShadowReg = X86::RDX; break; + case X86::XMM2: ShadowReg = X86::R8; break; + case X86::XMM3: ShadowReg = X86::R9; break; + } + if (ShadowReg) + RegsToPass.push_back(std::make_pair(ShadowReg, Arg)); } - if (ShadowReg) - RegsToPass.push_back(std::make_pair(ShadowReg, Arg)); } } else if (!IsSibcall && (!isTailCall || isByVal)) { + if (isByVal) { // In memory. + // ?? + } assert(VA.isMemLoc()); if (StackPtr.getNode() == 0) StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, dl, DAG, VA, Flags)); } - } + } // end for (all register/memloc assignments) if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, @@ -2438,6 +2464,39 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, return Offset; } +/// HandleByVal - Every parameter *after* a byval parameter is passed +/// on the stack. Remember the next parameter register to allocate, +/// and then confiscate the rest of the parameter registers to insure +/// this. +void +llvm::X86TargetLowering::HandleByVal(CCState *State, unsigned &size) const { + if (!Subtarget->is64Bit()) + return; + + if (size == 0 || size > 16) + return; + + int RegsRequired = (size > 8) ? 2 : 1; + + static const unsigned GPR64ArgRegs64Bit[] = { + X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 + }; + unsigned NextRegToAlloc = State->getFirstUnallocated(GPR64ArgRegs64Bit, 6); + + // If insufficient registers available + if (NextRegToAlloc + RegsRequired > 6) + return; + + size = 0; // Tell caller not to allocate stack. + + unsigned reg = State->AllocateReg(GPR64ArgRegs64Bit, 6); + State->setFirstByValReg(reg); + + if (RegsRequired == 2) { + State->AllocateReg(GPR64ArgRegs64Bit, 6); + } +} + /// MatchingStackOffset - Return true if the given stack call argument is /// already available in the same position (relatively) of the caller's /// incoming argument stack. diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index ca84a990450..be3559c0314 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -816,6 +816,8 @@ namespace llvm { // Utility functions to help LowerVECTOR_SHUFFLE SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const; + void HandleByVal(CCState *, unsigned &) const; + virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, diff --git a/test/CodeGen/X86/2011-05-18-byval-in-reg.ll b/test/CodeGen/X86/2011-05-18-byval-in-reg.ll new file mode 100644 index 00000000000..18f0415ce43 --- /dev/null +++ b/test/CodeGen/X86/2011-05-18-byval-in-reg.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s | FileCheck %s +; rdar://problem/6920088 +;target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "x86_64-apple-darwin9.0" +@"\01LC" = internal constant [2 x i8] c"a\00" ; <[2 x i8]*> [#uses=1] +@"\01LC1" = internal constant [2 x i8] c"b\00" ; <[2 x i8]*> [#uses=1] +@"\01LC2" = internal constant [2 x i8] c"c\00" ; <[2 x i8]*> [#uses=1] +@"\01LC3" = internal constant [2 x i8] c"d\00" ; <[2 x i8]*> [#uses=1] +@"\01LC4" = internal constant [2 x i8] c"e\00" ; <[2 x i8]*> [#uses=1] +@"\01LC5" = internal constant [2 x i8] c"f\00" ; <[2 x i8]*> [#uses=1] +@"\01LC6" = internal constant [2 x i8] c"g\00" ; <[2 x i8]*> [#uses=1] +@"\01LC7" = internal constant [4 x i8] c"%s\0A\00" ; <[4 x i8]*> [#uses=1] + +define i32 @main(i32 %argc, i8** %argv) nounwind { +entry: + %tmp = alloca i8* ; [#uses=2] + %tmp1 = alloca i8* ; [#uses=2] + %tmp2 = alloca i8* ; [#uses=2] +; CHECK: leaq LC4(%rip), [[AREG:%[a-z]+]] +; CHECK-NEXT: movq [[AREG]], [[STKOFF:[0-9]+]](%rsp) + store i8* getelementptr ([2 x i8]* @"\01LC4", i32 0, i32 0), i8** %tmp + store i8* getelementptr ([2 x i8]* @"\01LC5", i32 0, i32 0), i8** %tmp1 + store i8* getelementptr ([2 x i8]* @"\01LC6", i32 0, i32 0), i8** %tmp2 +; The LC4 struct should be passed in %r9: +; CHECK: movq [[STKOFF]](%rsp), %r9 + call void (i8**, ...)* @generate_password(i8** null, + i8* getelementptr ([2 x i8]* @"\01LC", i32 0, i32 0), + i8* getelementptr ([2 x i8]* @"\01LC1", i32 0, i32 0), + i8* getelementptr ([2 x i8]* @"\01LC2", i32 0, i32 0), + i8* getelementptr ([2 x i8]* @"\01LC3", i32 0, i32 0), + i8** byval %tmp, i8** byval %tmp1, i8** byval %tmp2) + ret i32 0 +} + +declare void @generate_password(i8** %pw, ...) nounwind -- 2.34.1