From: Jakob Stoklund Olesen Date: Sun, 7 Apr 2013 19:10:57 +0000 (+0000) Subject: Implement LowerCall_64 for the SPARC v9 64-bit ABI. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=18fdb398ea94c7ddee40bec49f63491922c5b110;p=oota-llvm.git Implement LowerCall_64 for the SPARC v9 64-bit ABI. There is still no support for byval arguments (which I don't think are needed) and varargs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178993 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index f987a15cee4..eb01c5ef318 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -594,6 +594,15 @@ LowerFormalArguments_64(SDValue Chain, SDValue SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { + if (Subtarget->is64Bit()) + return LowerCall_64(CLI, InVals); + return LowerCall_32(CLI, InVals); +} + +// Lower a call for the 32-bit ABI. +SDValue +SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; DebugLoc &dl = CLI.DL; SmallVector &Outs = CLI.Outs; @@ -887,6 +896,221 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const return getDataLayout()->getTypeAllocSize(ElementTy); } +// Lower a call for the 64-bit ABI. +SDValue +SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc DL = CLI.DL; + SDValue Chain = CLI.Chain; + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), + DAG.getTarget(), ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeCallOperands(CLI.Outs, CC_Sparc64); + + // Get the size of the outgoing arguments stack space requirement. + // The stack offset computed by CC_Sparc64 includes all arguments. + // We always allocate space for 6 arguments in the prolog. + unsigned ArgsSize = std::max(6*8u, CCInfo.getNextStackOffset()) - 6*8u; + + // Keep stack frames 16-byte aligned. + ArgsSize = RoundUpToAlignment(ArgsSize, 16); + + // Adjust the stack pointer to make room for the arguments. + // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls + // with more than 6 arguments. + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true)); + + // Collect the set of registers to pass to the function and their values. + // This will be emitted as a sequence of CopyToReg nodes glued to the call + // instruction. + SmallVector, 8> RegsToPass; + + // Collect chains from all the memory opeations that copy arguments to the + // stack. They must follow the stack pointer adjustment above and precede the + // call instruction itself. + SmallVector MemOpChains; + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + const CCValAssign &VA = ArgLocs[i]; + SDValue Arg = CLI.OutVals[i]; + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown location info!"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); + break; + } + + if (VA.isRegLoc()) { + // The custom bit on an i32 return value indicates that it should be + // passed in the high bits of the register. + if (VA.getValVT() == MVT::i32 && VA.needsCustom()) { + Arg = DAG.getNode(ISD::SHL, DL, MVT::i64, Arg, + DAG.getConstant(32, MVT::i32)); + + // The next value may go in the low bits of the same register. + // Handle both at once. + if (i+1 < ArgLocs.size() && ArgLocs[i+1].isRegLoc() && + ArgLocs[i+1].getLocReg() == VA.getLocReg()) { + SDValue NV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, + CLI.OutVals[i+1]); + Arg = DAG.getNode(ISD::OR, DL, MVT::i64, Arg, NV); + // Skip the next value, it's already done. + ++i; + } + } + + // The argument registers are described in term of the callee's register + // window, so translate I0-I7 -> O0-O7. + unsigned Reg = VA.getLocReg(); + if (Reg >= SP::I0 && Reg <= SP::I7) + Reg = Reg - SP::I0 + SP::O0; + RegsToPass.push_back(std::make_pair(Reg, Arg)); + continue; + } + + assert(VA.isMemLoc()); + + // Create a store off the stack pointer for this argument. + SDValue StackPtr = DAG.getRegister(SP::O6, getPointerTy()); + // The argument area starts at %fp+BIAS+128 in the callee frame, + // %sp+BIAS+128 in ours. + SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() + + Subtarget->getStackPointerBias() + + 128); + PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff); + MemOpChains.push_back(DAG.getStore(Chain, DL, Arg, PtrOff, + MachinePointerInfo(), + false, false, 0)); + } + + // Emit all stores, make sure they occur before the call. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of CopyToReg nodes glued together with token chain and + // glue operands which copy the outgoing args into registers. The InGlue is + // necessary since all emitted instructions must be stuck together in order + // to pass the live physical registers. + SDValue InGlue; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, DL, + RegsToPass[i].first, RegsToPass[i].second, InGlue); + InGlue = Chain.getValue(1); + } + + // If the callee is a GlobalAddress node (quite common, every direct call is) + // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. + // Likewise ExternalSymbol -> TargetExternalSymbol. + SDValue Callee = CLI.Callee; + if (GlobalAddressSDNode *G = dyn_cast(Callee)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy()); + else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) + Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy()); + + // Build the operands for the call instruction itself. + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + // Make sure the CopyToReg nodes are glued to the call instruction which + // consumes the registers. + if (InGlue.getNode()) + Ops.push_back(InGlue); + + // Now the call itself. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(SPISD::CALL, DL, NodeTys, &Ops[0], Ops.size()); + InGlue = Chain.getValue(1); + + // Revert the stack pointer immediately after the call. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true), + DAG.getIntPtrConstant(0, true), InGlue); + InGlue = Chain.getValue(1); + + // Now extract the return values. This is more or less the same as + // LowerFormalArguments_64. + + // Assign locations to each value returned by this call. + SmallVector RVLocs; + CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), + DAG.getTarget(), RVLocs, *DAG.getContext()); + RVInfo.AnalyzeCallResult(CLI.Ins, CC_Sparc64); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + unsigned Reg = VA.getLocReg(); + + // Remap I0-I7 -> O0-O7. + if (Reg >= SP::I0 && Reg <= SP::I7) + Reg = Reg - SP::I0 + SP::O0; + + // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can + // reside in the same register in the high and low bits. Reuse the + // CopyFromReg previous node to avoid duplicate copies. + SDValue RV; + if (RegisterSDNode *SrcReg = dyn_cast(Chain.getOperand(1))) + if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg) + RV = Chain.getValue(0); + + // But usually we'll create a new CopyFromReg for a different register. + if (!RV.getNode()) { + RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue); + Chain = RV.getValue(1); + InGlue = Chain.getValue(2); + } + + // Get the high bits for i32 struct elements. + if (VA.getValVT() == MVT::i32 && VA.needsCustom()) + RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV, + DAG.getConstant(32, MVT::i32)); + + // The callee promoted the return value, so insert an Assert?ext SDNode so + // we won't promote the value again in this function. + switch (VA.getLocInfo()) { + case CCValAssign::SExt: + RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV, + DAG.getValueType(VA.getValVT())); + break; + case CCValAssign::ZExt: + RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV, + DAG.getValueType(VA.getValVT())); + break; + default: + break; + } + + // Truncate the register down to the return value type. + if (VA.isExtInLoc()) + RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV); + + InVals.push_back(RV); + } + + return Chain; +} + //===----------------------------------------------------------------------===// // TargetLowering Implementation //===----------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 189a3882d3c..8a50f6890a0 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -95,6 +95,10 @@ namespace llvm { virtual SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; + SDValue LowerCall_32(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const; + SDValue LowerCall_64(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const; virtual SDValue LowerReturn(SDValue Chain, diff --git a/test/CodeGen/SPARC/64abi.ll b/test/CodeGen/SPARC/64abi.ll index d447ec76d84..10d8ff7c9ae 100644 --- a/test/CodeGen/SPARC/64abi.ll +++ b/test/CodeGen/SPARC/64abi.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=sparcv9 | FileCheck %s +; RUN: llc < %s -march=sparcv9 -disable-sparc-delay-filler | FileCheck %s ; CHECK: intarg ; CHECK: stb %i0, [%i4] @@ -17,7 +17,7 @@ define void @intarg(i8 %a0, ; %i0 i32 %a3, ; %i3 i8* %a4, ; %i4 i32 %a5, ; %i5 - i32 %a6, ; [%fp+BIAS+176] + i32 signext %a6, ; [%fp+BIAS+176] i8* %a7) { ; [%fp+BIAS+184] store i8 %a0, i8* %a4 store i8 %a1, i8* %a4 @@ -33,6 +33,18 @@ define void @intarg(i8 %a0, ; %i0 ret void } +; CHECK: call_intarg +; Sign-extend and store the full 64 bits. +; CHECK: sra %i0, 0, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%sp+2223] +; Use %o0-%o5 for outgoing arguments +; CHECK: or %g0, 5, %o5 +; CHECK: call intarg +define void @call_intarg(i32 %i0, i8* %i1) { + call void @intarg(i8 0, i8 1, i16 2, i32 3, i8* undef, i32 5, i32 %i0, i8* %i1) + ret void +} + ; CHECK: floatarg ; CHECK: fstod %f1, ; CHECK: faddd %f2, @@ -57,7 +69,7 @@ define double @floatarg(float %a0, ; %f1 float %a14, ; %f29 float %a15, ; %f31 float %a16, ; [%fp+BIAS+256] (using 8 bytes) - float %a17) { ; [%fp+BIAS+264] (using 8 bytes) + double %a17) { ; [%fp+BIAS+264] (using 8 bytes) %d0 = fpext float %a0 to double %s1 = fadd double %a1, %d0 %s2 = fadd double %a2, %s1 @@ -68,6 +80,23 @@ define double @floatarg(float %a0, ; %f1 ret double %s17 } +; CHECK: call_floatarg +; Store 4 bytes, right-aligned in slot. +; CHECK: st %f1, [%sp+2307] +; Store 8 bytes in full slot. +; CHECK: std %f2, [%sp+2311] +; CHECK: fmovd %f2, %f4 +; CHECK: call floatarg +define void @call_floatarg(float %f1, double %d2, float %f5, double *%p) { + %r = call double @floatarg(float %f5, double %d2, double %d2, double %d2, + float %f5, float %f5, float %f5, float %f5, + float %f5, float %f5, float %f5, float %f5, + float %f5, float %f5, float %f5, float %f5, + float %f1, double %d2) + store double %r, double* %p + ret void +} + ; CHECK: mixedarg ; CHECK: fstod %f3 ; CHECK: faddd %f6 @@ -92,6 +121,26 @@ define void @mixedarg(i8 %a0, ; %i0 ret void } +; CHECK: call_mixedarg +; CHECK: stx %i2, [%sp+2247] +; CHECK: stx %i0, [%sp+2223] +; CHECK: fmovd %f2, %f6 +; CHECK: fmovd %f2, %f16 +; CHECK: call mixedarg +define void @call_mixedarg(i64 %i0, double %f2, i16* %i2) { + call void @mixedarg(i8 undef, + float undef, + i16 undef, + double %f2, + i13 undef, + float undef, + i64 %i0, + double* undef, + double %f2, + i16* %i2) + ret void +} + ; The inreg attribute is used to indicate 32-bit sized struct elements that ; share an 8-byte slot. ; CHECK: inreg_fi @@ -105,6 +154,15 @@ define i32 @inreg_fi(i32 inreg %a0, ; high bits of %i0 ret i32 %rv } +; CHECK: call_inreg_fi +; CHECK: sllx %i1, 32, %o0 +; CHECK: fmovs %f5, %f1 +; CHECK: call inreg_fi +define void @call_inreg_fi(i32* %p, i32 %i1, float %f5) { + %x = call i32 @inreg_fi(i32 %i1, float %f5) + ret void +} + ; CHECK: inreg_ff ; CHECK: fsubs %f0, %f1, %f1 define float @inreg_ff(float inreg %a0, ; %f0 @@ -113,6 +171,15 @@ define float @inreg_ff(float inreg %a0, ; %f0 ret float %rv } +; CHECK: call_inreg_ff +; CHECK: fmovs %f3, %f0 +; CHECK: fmovs %f5, %f1 +; CHECK: call inreg_ff +define void @call_inreg_ff(i32* %p, float %f3, float %f5) { + %x = call float @inreg_ff(float %f3, float %f5) + ret void +} + ; CHECK: inreg_if ; CHECK: fstoi %f0 ; CHECK: sub %i0 @@ -123,6 +190,15 @@ define i32 @inreg_if(float inreg %a0, ; %f0 ret i32 %rv } +; CHECK: call_inreg_if +; CHECK: fmovs %f3, %f0 +; CHECK: or %g0, %i2, %o0 +; CHECK: call inreg_if +define void @call_inreg_if(i32* %p, float %f3, i32 %i2) { + %x = call i32 @inreg_if(float %f3, i32 %i2) + ret void +} + ; The frontend shouldn't do this. Just pass i64 instead. ; CHECK: inreg_ii ; CHECK: srlx %i0, 32, [[R:%[gilo][0-7]]] @@ -133,6 +209,16 @@ define i32 @inreg_ii(i32 inreg %a0, ; high bits of %i0 ret i32 %rv } +; CHECK: call_inreg_ii +; CHECK: srl %i2, 0, [[R2:%[gilo][0-7]]] +; CHECK: sllx %i1, 32, [[R1:%[gilo][0-7]]] +; CHECK: or [[R1]], [[R2]], %o0 +; CHECK: call inreg_ii +define void @call_inreg_ii(i32* %p, i32 %i1, i32 %i2) { + %x = call i32 @inreg_ii(i32 %i1, i32 %i2) + ret void +} + ; Structs up to 32 bytes in size can be returned in registers. ; CHECK: ret_i64_pair ; CHECK: ldx [%i2], %i0 @@ -146,6 +232,20 @@ define { i64, i64 } @ret_i64_pair(i32 %a0, i32 %a1, i64* %p, i64* %q) { ret { i64, i64 } %rv2 } +; CHECK: call_ret_i64_pair +; CHECK: call ret_i64_pair +; CHECK: stx %o0, [%i0] +; CHECK: stx %o1, [%i0] +define void @call_ret_i64_pair(i64* %i0) { + %rv = call { i64, i64 } @ret_i64_pair(i32 undef, i32 undef, + i64* undef, i64* undef) + %e0 = extractvalue { i64, i64 } %rv, 0 + store i64 %e0, i64* %i0 + %e1 = extractvalue { i64, i64 } %rv, 1 + store i64 %e1, i64* %i0 + ret void +} + ; This is not a C struct, each member uses 8 bytes. ; CHECK: ret_i32_float_pair ; CHECK: ld [%i2], %i0 @@ -160,6 +260,20 @@ define { i32, float } @ret_i32_float_pair(i32 %a0, i32 %a1, ret { i32, float } %rv2 } +; CHECK: call_ret_i32_float_pair +; CHECK: call ret_i32_float_pair +; CHECK: st %o0, [%i0] +; CHECK: st %f3, [%i1] +define void @call_ret_i32_float_pair(i32* %i0, float* %i1) { + %rv = call { i32, float } @ret_i32_float_pair(i32 undef, i32 undef, + i32* undef, float* undef) + %e0 = extractvalue { i32, float } %rv, 0 + store i32 %e0, i32* %i0 + %e1 = extractvalue { i32, float } %rv, 1 + store float %e1, float* %i1 + ret void +} + ; This is a C struct, each member uses 4 bytes. ; CHECK: ret_i32_float_packed ; CHECK: ld [%i2], [[R:%[gilo][0-7]]] @@ -175,6 +289,21 @@ define inreg { i32, float } @ret_i32_float_packed(i32 %a0, i32 %a1, ret { i32, float } %rv2 } +; CHECK: call_ret_i32_float_packed +; CHECK: call ret_i32_float_packed +; CHECK: srlx %o0, 32, [[R:%[gilo][0-7]]] +; CHECK: st [[R]], [%i0] +; CHECK: st %f1, [%i1] +define void @call_ret_i32_float_packed(i32* %i0, float* %i1) { + %rv = call { i32, float } @ret_i32_float_packed(i32 undef, i32 undef, + i32* undef, float* undef) + %e0 = extractvalue { i32, float } %rv, 0 + store i32 %e0, i32* %i0 + %e1 = extractvalue { i32, float } %rv, 1 + store float %e1, float* %i1 + ret void +} + ; The C frontend should use i64 to return { i32, i32 } structs, but verify that ; we don't miscompile thi case where both struct elements are placed in %i0. ; CHECK: ret_i32_packed @@ -192,6 +321,21 @@ define inreg { i32, i32 } @ret_i32_packed(i32 %a0, i32 %a1, ret { i32, i32 } %rv2 } +; CHECK: call_ret_i32_packed +; CHECK: call ret_i32_packed +; CHECK: srlx %o0, 32, [[R:%[gilo][0-7]]] +; CHECK: st [[R]], [%i0] +; CHECK: st %o0, [%i1] +define void @call_ret_i32_packed(i32* %i0, i32* %i1) { + %rv = call { i32, i32 } @ret_i32_packed(i32 undef, i32 undef, + i32* undef, i32* undef) + %e0 = extractvalue { i32, i32 } %rv, 0 + store i32 %e0, i32* %i0 + %e1 = extractvalue { i32, i32 } %rv, 1 + store i32 %e1, i32* %i1 + ret void +} + ; The return value must be sign-extended to 64 bits. ; CHECK: ret_sext ; CHECK: sra %i0, 0, %i0