From 63602ed8769afc21e7085d95263475c6669deaaf Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Mon, 26 Sep 2011 18:57:22 +0000 Subject: [PATCH] PTX: Fix detection of stack load/store vs. global load/store, as well as fix the printing of local offsets git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@140547 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PTX/PTXAsmPrinter.cpp | 11 +++- lib/Target/PTX/PTXISelDAGToDAG.cpp | 85 ++++++++++++++++++++--------- lib/Target/PTX/PTXInstrLoadStore.td | 27 ++------- test/CodeGen/PTX/stack-object.ll | 4 +- 4 files changed, 77 insertions(+), 50 deletions(-) diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index 260ed231b1d..c09a7626782 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -299,10 +299,12 @@ void PTXAsmPrinter::EmitFunctionBodyStart() { if (FrameInfo->getObjectSize(i) > 0) { std::string def = "\t.local .align "; def += utostr(FrameInfo->getObjectAlignment(i)); - def += " .b"; - def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits + def += " .b8"; def += " __local"; def += utostr(i); + def += "["; + def += utostr(FrameInfo->getObjectSize(i)); // Convert to bits + def += "]"; def += ";"; OutStreamer.EmitRawText(Twine(def)); } @@ -465,6 +467,11 @@ void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum, void PTXAsmPrinter::printLocalOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, const char *Modifier) { OS << "__local" << MI->getOperand(opNum).getImm(); + + if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() != 0){ + OS << "+"; + printOperand(MI, opNum+1, OS); + } } void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { diff --git a/lib/Target/PTX/PTXISelDAGToDAG.cpp b/lib/Target/PTX/PTXISelDAGToDAG.cpp index 383ba44f3fe..d99d49075d8 100644 --- a/lib/Target/PTX/PTXISelDAGToDAG.cpp +++ b/lib/Target/PTX/PTXISelDAGToDAG.cpp @@ -213,14 +213,54 @@ bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) { // Match memory operand of the form [reg], [imm+reg], and [reg+imm] bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset) { - if (Addr.getOpcode() != ISD::ADD) { + // FrameIndex addresses are handled separately + //errs() << "SelectADDRri: "; + //Addr.getNode()->dumpr(); + if (isa(Addr)) { + //errs() << "Failure\n"; + return false; + } + + if (CurDAG->isBaseWithConstantOffset(Addr)) { + Base = Addr.getOperand(0); + if (isa(Base)) { + //errs() << "Failure\n"; + return false; + } + ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); + //errs() << "Success\n"; + return true; + } + + /*if (Addr.getNumOperands() == 1) { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + errs() << "Success\n"; + return true; + }*/ + + //errs() << "SelectADDRri fails on: "; + //Addr.getNode()->dumpr(); + + if (isImm(Addr)) { + //errs() << "Failure\n"; + return false; + } + + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + + //errs() << "Success\n"; + return true; + + /*if (Addr.getOpcode() != ISD::ADD) { // let SelectADDRii handle the [imm] case if (isImm(Addr)) return false; // it is [reg] assert(Addr.getValueType().isSimple() && "Type must be simple"); - Base = Addr; Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); @@ -242,7 +282,7 @@ bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base, } // neither [reg+imm] nor [imm+reg] - return false; + return false;*/ } // Match memory operand of the form [imm+imm] and [imm] @@ -269,35 +309,30 @@ bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base, // Match memory operand of the form [reg], [imm+reg], and [reg+imm] bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base, SDValue &Offset) { - if (Addr.getOpcode() != ISD::ADD) { - // let SelectADDRii handle the [imm] case - if (isImm(Addr)) - return false; - // it is [reg] - - assert(Addr.getValueType().isSimple() && "Type must be simple"); - + //errs() << "SelectADDRlocal: "; + //Addr.getNode()->dumpr(); + if (isa(Addr)) { Base = Addr; Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); - + //errs() << "Success\n"; return true; } - if (Addr.getNumOperands() < 2) - return false; - - // let SelectADDRii handle the [imm+imm] case - if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1))) - return false; - - // try [reg+imm] and [imm+reg] - for (int i = 0; i < 2; i ++) - if (SelectImm(Addr.getOperand(1-i), Offset)) { - Base = Addr.getOperand(i); - return true; + if (CurDAG->isBaseWithConstantOffset(Addr)) { + Base = Addr.getOperand(0); + if (!isa(Base)) { + //errs() << "Failure\n"; + return false; } + ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); + //errs() << "Offset: "; + //Offset.getNode()->dumpr(); + //errs() << "Success\n"; + return true; + } - // neither [reg+imm] nor [imm+reg] + //errs() << "Failure\n"; return false; } diff --git a/lib/Target/PTX/PTXInstrLoadStore.td b/lib/Target/PTX/PTXInstrLoadStore.td index 83aafd81ef6..bb84bb56969 100644 --- a/lib/Target/PTX/PTXInstrLoadStore.td +++ b/lib/Target/PTX/PTXInstrLoadStore.td @@ -24,9 +24,7 @@ def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">; def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{ const Value *Src; const PointerType *PT; - const SDValue &MemOp = N->getOperand(1); - if ((MemOp.getOpcode() != ISD::FrameIndex) && - (Src = cast(N)->getSrcValue()) && + if ((Src = cast(N)->getSrcValue()) && (PT = dyn_cast(Src->getType()))) return PT->getAddressSpace() == PTX::GLOBAL; return false; @@ -41,11 +39,6 @@ def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return false; }]>; -def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const SDValue &MemOp = N->getOperand(1); - return MemOp.getOpcode() == ISD::FrameIndex; -}]>; - def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{ const Value *Src; const PointerType *PT; @@ -59,20 +52,12 @@ def store_global : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ const Value *Src; const PointerType *PT; - const SDValue &MemOp = N->getOperand(2); - if ((MemOp.getOpcode() != ISD::FrameIndex) && - (Src = cast(N)->getSrcValue()) && + if ((Src = cast(N)->getSrcValue()) && (PT = dyn_cast(Src->getType()))) return PT->getAddressSpace() == PTX::GLOBAL; return false; }]>; -def store_local - : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ - const SDValue &MemOp = N->getOperand(2); - return MemOp.getOpcode() == ISD::FrameIndex; -}]>; - def store_shared : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ const Value *Src; @@ -221,16 +206,16 @@ multiclass PTX_ST { def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a), !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (load_local ADDRlocal32:$a))]>; + [(set RC:$d, (load_global ADDRlocal32:$a))]>; def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a), !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (load_local ADDRlocal64:$a))]>; + [(set RC:$d, (load_global ADDRlocal64:$a))]>; def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a), !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")), - [(store_local RC:$d, ADDRlocal32:$a)]>; + [(store_global RC:$d, ADDRlocal32:$a)]>; def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a), !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")), - [(store_local RC:$d, ADDRlocal64:$a)]>; + [(store_global RC:$d, ADDRlocal64:$a)]>; } multiclass PTX_PARAM_LD_ST { diff --git a/test/CodeGen/PTX/stack-object.ll b/test/CodeGen/PTX/stack-object.ll index aab7f51f1f7..65f8ee2300c 100644 --- a/test/CodeGen/PTX/stack-object.ll +++ b/test/CodeGen/PTX/stack-object.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s define ptx_device float @stack1(float %a) { - ; CHECK: .local .align 4 .b32 __local0; + ; CHECK: .local .align 4 .b8 __local0[4]; %a.2 = alloca float, align 4 ; CHECK: st.local.f32 [__local0], %f0 store float %a, float* %a.2 @@ -10,7 +10,7 @@ define ptx_device float @stack1(float %a) { } define ptx_device float @stack1_align8(float %a) { - ; CHECK: .local .align 8 .b32 __local0; + ; CHECK: .local .align 8 .b8 __local0[4]; %a.2 = alloca float, align 8 ; CHECK: st.local.f32 [__local0], %f0 store float %a, float* %a.2 -- 2.34.1