From 7e96f0f6fffbdebfdac238ae76cc3a791acfa23e Mon Sep 17 00:00:00 2001 From: Charles Davis Date: Tue, 25 Aug 2015 23:27:41 +0000 Subject: [PATCH] Make variable argument intrinsics behave correctly in a Win64 CC function. Summary: This change makes the variable argument intrinsics, `llvm.va_start` and `llvm.va_copy`, and the `va_arg` instruction behave as they do on Windows inside a `CallingConv::X86_64_Win64` function. It's needed for a Clang patch I have to add support for GCC's `__builtin_ms_va_list` constructs. Reviewers: nadav, asl, eugenis CC: llvm-commits Differential Revision: http://llvm-reviews.chandlerc.com/D1622 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@245990 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/SelectionDAG.h | 6 + lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 52 +-------- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 51 +++++++++ lib/Target/X86/X86ISelLowering.cpp | 28 +++-- .../Instrumentation/MemorySanitizer.cpp | 4 + test/CodeGen/X86/x86-64-ms_abi-vararg.ll | 108 ++++++++++++++++++ 6 files changed, 191 insertions(+), 58 deletions(-) create mode 100644 test/CodeGen/X86/x86-64-ms_abi-vararg.ll diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 11249f05986..07cd637e17b 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -901,6 +901,12 @@ public: /// the target's desired shift amount type. SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op); + /// Expand the specified \c ISD::VAARG node as the Legalize pass would. + SDValue expandVAArg(SDNode *Node); + + /// Expand the specified \c ISD::VACOPY node as the Legalize pass would. + SDValue expandVACopy(SDNode *Node); + /// *Mutate* the specified node in-place to have the /// specified operands. If the resultant node already exists in the DAG, /// this does not modify the specified node, instead it returns the node that diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 552f92bbd46..be08ca8c0b2 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3099,57 +3099,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } - case ISD::VAARG: { - const Value *V = cast(Node->getOperand(2))->getValue(); - EVT VT = Node->getValueType(0); - Tmp1 = Node->getOperand(0); - Tmp2 = Node->getOperand(1); - unsigned Align = Node->getConstantOperandVal(3); - - SDValue VAListLoad = - DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl, Tmp1, Tmp2, - MachinePointerInfo(V), false, false, false, 0); - SDValue VAList = VAListLoad; - - if (Align > TLI.getMinStackArgumentAlignment()) { - assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); - - VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, - DAG.getConstant(Align - 1, dl, - VAList.getValueType())); - - VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList, - DAG.getConstant(-(int64_t)Align, dl, - VAList.getValueType())); - } - - // Increment the pointer, VAList, to the next vaarg - Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, - DAG.getConstant(DAG.getDataLayout().getTypeAllocSize( - VT.getTypeForEVT(*DAG.getContext())), - dl, VAList.getValueType())); - // Store the incremented VAList to the legalized pointer - Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, - MachinePointerInfo(V), false, false, 0); - // Load the actual argument out of the pointer VAList - Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(), - false, false, false, 0)); + case ISD::VAARG: + Results.push_back(DAG.expandVAArg(Node)); Results.push_back(Results[0].getValue(1)); break; - } - case ISD::VACOPY: { - // This defaults to loading a pointer from the input and storing it to the - // output, returning the chain. - const Value *VD = cast(Node->getOperand(3))->getValue(); - const Value *VS = cast(Node->getOperand(4))->getValue(); - Tmp1 = DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl, - Node->getOperand(0), Node->getOperand(2), - MachinePointerInfo(VS), false, false, false, 0); - Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), - MachinePointerInfo(VD), false, false, 0); - Results.push_back(Tmp1); + case ISD::VACOPY: + Results.push_back(DAG.expandVACopy(Node)); break; - } case ISD::EXTRACT_VECTOR_ELT: if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) // This must be an access of the only element. Return it. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 0d73bc5e65d..82537eb8923 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1855,6 +1855,57 @@ SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { return getZExtOrTrunc(Op, SDLoc(Op), ShTy); } +SDValue SelectionDAG::expandVAArg(SDNode *Node) { + SDLoc dl(Node); + const TargetLowering &TLI = getTargetLoweringInfo(); + const Value *V = cast(Node->getOperand(2))->getValue(); + EVT VT = Node->getValueType(0); + SDValue Tmp1 = Node->getOperand(0); + SDValue Tmp2 = Node->getOperand(1); + unsigned Align = Node->getConstantOperandVal(3); + + SDValue VAListLoad = + getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, Tmp2, + MachinePointerInfo(V), false, false, false, 0); + SDValue VAList = VAListLoad; + + if (Align > TLI.getMinStackArgumentAlignment()) { + assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); + + VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, + getConstant(Align - 1, dl, VAList.getValueType())); + + VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList, + getConstant(-(int64_t)Align, dl, VAList.getValueType())); + } + + // Increment the pointer, VAList, to the next vaarg + Tmp1 = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, + getConstant(getDataLayout().getTypeAllocSize( + VT.getTypeForEVT(*getContext())), + dl, VAList.getValueType())); + // Store the incremented VAList to the legalized pointer + Tmp1 = getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2, + MachinePointerInfo(V), false, false, 0); + // Load the actual argument out of the pointer VAList + return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo(), + false, false, false, 0); +} + +SDValue SelectionDAG::expandVACopy(SDNode *Node) { + SDLoc dl(Node); + const TargetLowering &TLI = getTargetLoweringInfo(); + // This defaults to loading a pointer from the input and storing it to the + // output, returning the chain. + const Value *VD = cast(Node->getOperand(3))->getValue(); + const Value *VS = cast(Node->getOperand(4))->getValue(); + SDValue Tmp1 = getLoad(TLI.getPointerTy(getDataLayout()), dl, + Node->getOperand(0), Node->getOperand(2), + MachinePointerInfo(VS), false, false, false, 0); + return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), + MachinePointerInfo(VD), false, false, 0); +} + /// CreateStackTemporary - Create a stack temporary, suitable for holding the /// specified value type. SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0ade9654b29..0f8736f311e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -484,8 +484,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); setOperationAction(ISD::VAEND , MVT::Other, Expand); - if (Subtarget->is64Bit() && !Subtarget->isTargetWin64()) { - // TargetInfo::X86_64ABIBuiltinVaList + if (Subtarget->is64Bit()) { setOperationAction(ISD::VAARG , MVT::Other, Custom); setOperationAction(ISD::VACOPY , MVT::Other, Custom); } else { @@ -15153,7 +15152,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { const Value *SV = cast(Op.getOperand(2))->getValue(); SDLoc DL(Op); - if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) { + if (!Subtarget->is64Bit() || + Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv())) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); @@ -15203,10 +15203,13 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->is64Bit() && "LowerVAARG only handles 64-bit va_arg!"); - assert((Subtarget->isTargetLinux() || - Subtarget->isTargetDarwin()) && - "Unhandled target in LowerVAARG"); assert(Op.getNode()->getNumOperands() == 4); + + MachineFunction &MF = DAG.getMachineFunction(); + if (Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv())) + // The Win64 ABI uses char* instead of a structure. + return DAG.expandVAArg(Op.getNode()); + SDValue Chain = Op.getOperand(0); SDValue SrcPtr = Op.getOperand(1); const Value *SV = cast(Op.getOperand(2))->getValue(); @@ -15234,8 +15237,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { if (ArgMode == 2) { // Sanity Check: Make sure using fp_offset makes sense. assert(!Subtarget->useSoftFloat() && - !(DAG.getMachineFunction().getFunction()->hasFnAttribute( - Attribute::NoImplicitFloat)) && + !(MF.getFunction()->hasFnAttribute(Attribute::NoImplicitFloat)) && Subtarget->hasSSE1()); } @@ -15264,8 +15266,14 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - // X86-64 va_list is a struct { i32, i32, i8*, i8* }. + // X86-64 va_list is a struct { i32, i32, i8*, i8* }, except on Windows, + // where a va_list is still an i8*. assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!"); + if (Subtarget->isCallingConvWin64( + DAG.getMachineFunction().getFunction()->getCallingConv())) + // Probably a Win64 va_copy. + return DAG.expandVACopy(Op.getNode()); + SDValue Chain = Op.getOperand(0); SDValue DstPtr = Op.getOperand(1); SDValue SrcPtr = Op.getOperand(2); @@ -20034,7 +20042,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( int64_t RegSaveFrameIndex = MI->getOperand(1).getImm(); int64_t VarArgsFPOffset = MI->getOperand(2).getImm(); - if (!Subtarget->isTargetWin64()) { + if (!Subtarget->isCallingConvWin64(F->getFunction()->getCallingConv())) { // If %al is 0, branch around the XMM save block. BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg); BuildMI(MBB, DL, TII->get(X86::JE_1)).addMBB(EndMBB); diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index d296474abe1..eb2a5b3653c 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2863,6 +2863,8 @@ struct VarArgAMD64Helper : public VarArgHelper { } void visitVAStartInst(VAStartInst &I) override { + if (F.getCallingConv() == CallingConv::X86_64_Win64) + return; IRBuilder<> IRB(&I); VAStartInstrumentationList.push_back(&I); Value *VAListTag = I.getArgOperand(0); @@ -2875,6 +2877,8 @@ struct VarArgAMD64Helper : public VarArgHelper { } void visitVACopyInst(VACopyInst &I) override { + if (F.getCallingConv() == CallingConv::X86_64_Win64) + return; IRBuilder<> IRB(&I); Value *VAListTag = I.getArgOperand(0); Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); diff --git a/test/CodeGen/X86/x86-64-ms_abi-vararg.ll b/test/CodeGen/X86/x86-64-ms_abi-vararg.ll new file mode 100644 index 00000000000..e3436521a5b --- /dev/null +++ b/test/CodeGen/X86/x86-64-ms_abi-vararg.ll @@ -0,0 +1,108 @@ +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-pc-linux-gnu | FileCheck %s + +; Verify that the var arg parameters which are passed in registers are stored +; in home stack slots allocated by the caller and that AP is correctly +; calculated. +define x86_64_win64cc void @average_va(i32 %count, ...) nounwind { +entry: +; CHECK: pushq +; CHECK: movq %r9, 40(%rsp) +; CHECK: movq %r8, 32(%rsp) +; CHECK: movq %rdx, 24(%rsp) +; CHECK: leaq 24(%rsp), %rax + + %ap = alloca i8*, align 8 ; [#uses=1] + %ap.0 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap.0) + ret void +} + +declare void @llvm.va_start(i8*) nounwind +declare void @llvm.va_copy(i8*, i8*) nounwind +declare void @llvm.va_end(i8*) nounwind + +; CHECK-LABEL: f5: +; CHECK: pushq +; CHECK: leaq 56(%rsp), +define x86_64_win64cc i8** @f5(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, ...) nounwind { +entry: + %ap = alloca i8*, align 8 + %ap.0 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap.0) + ret i8** %ap +} + +; CHECK-LABEL: f4: +; CHECK: pushq +; CHECK: leaq 48(%rsp), +define x86_64_win64cc i8** @f4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind { +entry: + %ap = alloca i8*, align 8 + %ap.0 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap.0) + ret i8** %ap +} + +; CHECK-LABEL: f3: +; CHECK: pushq +; CHECK: leaq 40(%rsp), +define x86_64_win64cc i8** @f3(i64 %a0, i64 %a1, i64 %a2, ...) nounwind { +entry: + %ap = alloca i8*, align 8 + %ap.0 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap.0) + ret i8** %ap +} + +; WinX86_64 uses char* for va_list. Verify that the correct amount of bytes +; are copied using va_copy. + +; CHECK-LABEL: copy1: +; CHECK: leaq 32(%rsp), [[REG_copy1:%[a-z]+]] +; CHECK: movq [[REG_copy1]], 8(%rsp) +; CHECK: movq [[REG_copy1]], (%rsp) +; CHECK: ret +define x86_64_win64cc void @copy1(i64 %a0, ...) nounwind { +entry: + %ap = alloca i8*, align 8 + %cp = alloca i8*, align 8 + %ap.0 = bitcast i8** %ap to i8* + %cp.0 = bitcast i8** %cp to i8* + call void @llvm.va_start(i8* %ap.0) + call void @llvm.va_copy(i8* %cp.0, i8* %ap.0) + ret void +} + +; CHECK-LABEL: copy4: +; CHECK: leaq 56(%rsp), [[REG_copy4:%[a-z]+]] +; CHECK: movq [[REG_copy4]], 8(%rsp) +; CHECK: movq [[REG_copy4]], (%rsp) +; CHECK: ret +define x86_64_win64cc void @copy4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind { +entry: + %ap = alloca i8*, align 8 + %cp = alloca i8*, align 8 + %ap.0 = bitcast i8** %ap to i8* + %cp.0 = bitcast i8** %cp to i8* + call void @llvm.va_start(i8* %ap.0) + call void @llvm.va_copy(i8* %cp.0, i8* %ap.0) + ret void +} + +; CHECK-LABEL: arg4: +; va_start: +; CHECK: leaq 48(%rsp), [[REG_arg4_1:%[a-z]+]] +; CHECK: movq [[REG_arg4_1]], (%rsp) +; va_arg: +; CHECK: leaq 52(%rsp), [[REG_arg4_2:%[a-z]+]] +; CHECK: movq [[REG_arg4_2]], (%rsp) +; CHECK: movl 48(%rsp), %eax +; CHECK: ret +define x86_64_win64cc i32 @arg4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind { +entry: + %ap = alloca i8*, align 8 + %ap.0 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap.0) + %tmp = va_arg i8** %ap, i32 + ret i32 %tmp +} -- 2.34.1