From: Tom Stellard Date: Thu, 8 Jan 2015 15:08:17 +0000 (+0000) Subject: R600/SI: Remove SIISelLowering::legalizeOperands() X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=9a6e4f08fe96257babf8b7d152b4768b31c3540a;p=oota-llvm.git R600/SI: Remove SIISelLowering::legalizeOperands() Its functionality has been replaced by calling SIInstrInfo::legalizeOperands() from SIISelLowering::AdjstInstrPostInstrSelection() and running the SIFoldOperands and SIShrinkInstructions passes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225445 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index de2cdd85ff3..a0668f279a5 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -1690,12 +1690,6 @@ static bool isVSrc(unsigned RegClass) { } } -/// \brief Test if RegClass is one of the SSrc classes -static bool isSSrc(unsigned RegClass) { - return AMDGPU::SSrc_32RegClassID == RegClass || - AMDGPU::SSrc_64RegClassID == RegClass; -} - /// \brief Analyze the possible immediate value Op /// /// Returns -1 if it isn't an immediate, 0 if it's and inline immediate @@ -1728,44 +1722,6 @@ int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const { return -1; } -/// \brief Try to fold an immediate directly into an instruction -bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate, - bool &ScalarSlotUsed) const { - - MachineSDNode *Mov = dyn_cast(Operand); - const SIInstrInfo *TII = static_cast( - getTargetMachine().getSubtargetImpl()->getInstrInfo()); - if (!Mov || !TII->isMov(Mov->getMachineOpcode())) - return false; - - const SDValue &Op = Mov->getOperand(0); - int32_t Value = analyzeImmediate(Op.getNode()); - if (Value == -1) { - // Not an immediate at all - return false; - - } else if (Value == 0) { - // Inline immediates can always be fold - Operand = Op; - return true; - - } else if (Value == Immediate) { - // Already fold literal immediate - Operand = Op; - return true; - - } else if (!ScalarSlotUsed && !Immediate) { - // Fold this literal immediate - ScalarSlotUsed = true; - Immediate = Value; - Operand = Op; - return true; - - } - - return false; -} - const TargetRegisterClass *SITargetLowering::getRegClassForNode( SelectionDAG &DAG, const SDValue &Op) const { const SIInstrInfo *TII = static_cast( @@ -1829,133 +1785,6 @@ bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, const SDValue &Op, return TRI->getRegClass(RegClass)->hasSubClassEq(RC); } -/// \returns true if \p Node's operands are different from the SDValue list -/// \p Ops -static bool isNodeChanged(const SDNode *Node, const std::vector &Ops) { - for (unsigned i = 0, e = Node->getNumOperands(); i < e; ++i) { - if (Ops[i].getNode() != Node->getOperand(i).getNode()) { - return true; - } - } - return false; -} - -/// TODO: This needs to be removed. It's current primary purpose is to fold -/// immediates into operands when legal. The legalization parts are redundant -/// with SIInstrInfo::legalizeOperands which is called in a post-isel hook. -SDNode *SITargetLowering::legalizeOperands(MachineSDNode *Node, - SelectionDAG &DAG) const { - // Original encoding (either e32 or e64) - int Opcode = Node->getMachineOpcode(); - const SIInstrInfo *TII = static_cast( - getTargetMachine().getSubtargetImpl()->getInstrInfo()); - const MCInstrDesc *Desc = &TII->get(Opcode); - - unsigned NumDefs = Desc->getNumDefs(); - unsigned NumOps = Desc->getNumOperands(); - - // Commuted opcode if available - int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1; - const MCInstrDesc *DescRev = OpcodeRev == -1 ? nullptr : &TII->get(OpcodeRev); - - assert(!DescRev || DescRev->getNumDefs() == NumDefs); - assert(!DescRev || DescRev->getNumOperands() == NumOps); - - int32_t Immediate = Desc->getSize() == 4 ? 0 : -1; - bool HaveVSrc = false, HaveSSrc = false; - - // First figure out what we already have in this instruction. - for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs; - i != e && Op < NumOps; ++i, ++Op) { - - unsigned RegClass = Desc->OpInfo[Op].RegClass; - if (isVSrc(RegClass)) - HaveVSrc = true; - else if (isSSrc(RegClass)) - HaveSSrc = true; - else - continue; - - int32_t Imm = analyzeImmediate(Node->getOperand(i).getNode()); - if (Imm != -1 && Imm != 0) { - // Literal immediate - Immediate = Imm; - } - } - - // If we neither have VSrc nor SSrc, it makes no sense to continue. - if (!HaveVSrc && !HaveSSrc) - return Node; - - // No scalar allowed when we have both VSrc and SSrc - bool ScalarSlotUsed = HaveVSrc && HaveSSrc; - - // If this instruction has an implicit use of VCC, then it can't use the - // constant bus. - for (unsigned i = 0, e = Desc->getNumImplicitUses(); i != e; ++i) { - if (Desc->ImplicitUses[i] == AMDGPU::VCC) { - ScalarSlotUsed = true; - break; - } - } - - // Second go over the operands and try to fold them - std::vector Ops; - for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs; - i != e && Op < NumOps; ++i, ++Op) { - - const SDValue &Operand = Node->getOperand(i); - Ops.push_back(Operand); - - // Already folded immediate? - if (isa(Operand.getNode()) || - isa(Operand.getNode())) - continue; - - // Is this a VSrc or SSrc operand? - unsigned RegClass = Desc->OpInfo[Op].RegClass; - if (isVSrc(RegClass) || isSSrc(RegClass)) { - // Try to fold the immediates. If this ends up with multiple constant bus - // uses, it will be legalized later. - foldImm(Ops[i], Immediate, ScalarSlotUsed); - continue; - } - - if (i == 1 && DescRev && fitsRegClass(DAG, Ops[0], RegClass)) { - - unsigned OtherRegClass = Desc->OpInfo[NumDefs].RegClass; - assert(isVSrc(OtherRegClass) || isSSrc(OtherRegClass)); - - // Test if it makes sense to swap operands - if (foldImm(Ops[1], Immediate, ScalarSlotUsed) || - (!fitsRegClass(DAG, Ops[1], RegClass) && - fitsRegClass(DAG, Ops[1], OtherRegClass))) { - - // Swap commutable operands - std::swap(Ops[0], Ops[1]); - - Desc = DescRev; - DescRev = nullptr; - continue; - } - } - } - - // Add optional chain and glue - for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i) - Ops.push_back(Node->getOperand(i)); - - // Nodes that have a glue result are not CSE'd by getMachineNode(), so in - // this case a brand new node is always be created, even if the operands - // are the same as before. So, manually check if anything has been changed. - if (Desc->Opcode == Opcode && !isNodeChanged(Node, Ops)) { - return Node; - } - - // Create a complete new instruction - return DAG.getMachineNode(Desc->Opcode, SDLoc(Node), Node->getVTList(), Ops); -} - /// \brief Helper function for adjustWritemask static unsigned SubIdx2Lane(unsigned Idx) { switch (Idx) { @@ -2084,8 +1913,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, legalizeTargetIndependentNode(Node, DAG); return Node; } - - return legalizeOperands(Node, DAG); + return Node; } /// \brief Assign the register class depending on the number of diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 8b41245a15c..dc218202b12 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -42,14 +42,11 @@ class SITargetLowering : public AMDGPUTargetLowering { SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; - bool foldImm(SDValue &Operand, int32_t &Immediate, - bool &ScalarSlotUsed) const; const TargetRegisterClass *getRegClassForNode(SelectionDAG &DAG, const SDValue &Op) const; bool fitsRegClass(SelectionDAG &DAG, const SDValue &Op, unsigned RegClass) const; - SDNode *legalizeOperands(MachineSDNode *N, SelectionDAG &DAG) const; void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const; MachineSDNode *AdjustRegClass(MachineSDNode *N, SelectionDAG &DAG) const; diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll index ef912c3b78e..ca3350dd7f4 100644 --- a/test/CodeGen/R600/fneg.ll +++ b/test/CodeGen/R600/fneg.ll @@ -48,7 +48,7 @@ define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %i ; R600: -KC0[2].Z ; XXX: We could use v_add_f32_e64 with the negate bit here instead. -; SI: v_sub_f32_e64 v{{[0-9]}}, 0.0, s{{[0-9]+$}} +; SI: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}} define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) { %bc = bitcast i32 %in to float %fsub = fsub float 0.0, %bc diff --git a/test/CodeGen/R600/imm.ll b/test/CodeGen/R600/imm.ll index 416462b62bc..6e4fa3cc60c 100644 --- a/test/CodeGen/R600/imm.ll +++ b/test/CodeGen/R600/imm.ll @@ -112,7 +112,7 @@ define void @store_literal_imm_f32(float addrspace(1)* %out) { ; CHECK-LABEL: {{^}}add_inline_imm_0.0_f32 ; CHECK: s_load_dword [[VAL:s[0-9]+]] -; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0.0, [[VAL]]{{$}} +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0, [[VAL]]{{$}} ; CHECK-NEXT: buffer_store_dword [[REG]] define void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 0.0 @@ -304,7 +304,7 @@ define void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) { ; CHECK-LABEL: {{^}}add_inline_imm_0.0_f64 ; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb -; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0.0, [[VAL]] +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0, [[VAL]] ; CHECK-NEXT: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, 0.0 diff --git a/test/CodeGen/R600/operand-folding.ll b/test/CodeGen/R600/operand-folding.ll index c430c5a0dcc..88a8145dcd6 100644 --- a/test/CodeGen/R600/operand-folding.ll +++ b/test/CodeGen/R600/operand-folding.ll @@ -87,6 +87,27 @@ entry: store i32 %tmp1, i32 addrspace(1)* %out ret void } +; CHECK-LABEL: {{^}}vector_imm: +; CHECK: s_movk_i32 [[IMM:s[0-9]+]], 0x64 +; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}} +; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}} +; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}} +; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}} + +define void @vector_imm(<4 x i32> addrspace(1)* %out) { +entry: + %tmp0 = call i32 @llvm.r600.read.tidig.x() + %tmp1 = add i32 %tmp0, 1 + %tmp2 = add i32 %tmp0, 2 + %tmp3 = add i32 %tmp0, 3 + %vec0 = insertelement <4 x i32> undef, i32 %tmp0, i32 0 + %vec1 = insertelement <4 x i32> %vec0, i32 %tmp1, i32 1 + %vec2 = insertelement <4 x i32> %vec1, i32 %tmp2, i32 2 + %vec3 = insertelement <4 x i32> %vec2, i32 %tmp3, i32 3 + %tmp4 = xor <4 x i32> , %vec3 + store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %out + ret void +} declare i32 @llvm.r600.read.tidig.x() #0 attributes #0 = { readnone } diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll index 0530cbd9dc1..c6265a451c5 100644 --- a/test/CodeGen/R600/seto.ll +++ b/test/CodeGen/R600/seto.ll @@ -2,7 +2,7 @@ ; CHECK-LABEL: {{^}}main: ; CHECK: v_cmp_o_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]] -; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0.0, 1.0, [[CMP]] +; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]] define void @main(float %p) { main_body: %c = fcmp oeq float %p, %p diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll index c13c7dcb19a..f2113096e3a 100644 --- a/test/CodeGen/R600/setuo.ll +++ b/test/CodeGen/R600/setuo.ll @@ -2,7 +2,7 @@ ; CHECK-LABEL: {{^}}main: ; CHECK: v_cmp_u_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]] -; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0.0, 1.0, [[CMP]] +; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]] define void @main(float %p) { main_body: %c = fcmp une float %p, %p diff --git a/test/CodeGen/R600/sint_to_fp.f64.ll b/test/CodeGen/R600/sint_to_fp.f64.ll index abce1777aad..77844a6aa38 100644 --- a/test/CodeGen/R600/sint_to_fp.f64.ll +++ b/test/CodeGen/R600/sint_to_fp.f64.ll @@ -45,9 +45,9 @@ define void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) { ; SI-LABEL: @v_sint_to_fp_i64_to_f64 ; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} -; SI-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]] -; SI-DAG: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]] +; SI: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]] ; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32 +; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]] ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]] ; SI: buffer_store_dwordx2 [[RESULT]] define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) { diff --git a/test/CodeGen/R600/sint_to_fp.ll b/test/CodeGen/R600/sint_to_fp.ll index 06b8df4c521..dd541bb36a2 100644 --- a/test/CodeGen/R600/sint_to_fp.ll +++ b/test/CodeGen/R600/sint_to_fp.ll @@ -42,7 +42,7 @@ define void @sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspac ; FUNC-LABEL: {{^}}sint_to_fp_i1_f32: ; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0.0, 1.0, [[CMP]] +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm define void @sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) { diff --git a/test/CodeGen/R600/uint_to_fp.f64.ll b/test/CodeGen/R600/uint_to_fp.f64.ll index f34683395d5..09e987dd14d 100644 --- a/test/CodeGen/R600/uint_to_fp.f64.ll +++ b/test/CodeGen/R600/uint_to_fp.f64.ll @@ -4,9 +4,9 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; SI-LABEL: {{^}}v_uint_to_fp_i64_to_f64 ; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} -; SI-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]] -; SI-DAG: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]] +; SI: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]] ; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32 +; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]] ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]] ; SI: buffer_store_dwordx2 [[RESULT]] define void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) { diff --git a/test/CodeGen/R600/uint_to_fp.ll b/test/CodeGen/R600/uint_to_fp.ll index adb365315a0..1c5e487724b 100644 --- a/test/CodeGen/R600/uint_to_fp.ll +++ b/test/CodeGen/R600/uint_to_fp.ll @@ -60,7 +60,7 @@ entry: ; FUNC-LABEL: {{^}}uint_to_fp_i1_to_f32: ; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0.0, 1.0, [[CMP]] +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm define void @uint_to_fp_i1_to_f32(float addrspace(1)* %out, i32 %in) { diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/R600/xor.ll index b444b4ec08e..6ed35f2cbd3 100644 --- a/test/CodeGen/R600/xor.ll +++ b/test/CodeGen/R600/xor.ll @@ -39,7 +39,7 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in ; FUNC-LABEL: {{^}}xor_i1: ; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}} -; SI-DAG: v_cmp_ge_f32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 0.0 +; SI-DAG: v_cmp_ge_f32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 0 ; SI-DAG: v_cmp_ge_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 1.0 ; SI: s_xor_b64 [[XOR:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]] ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[XOR]]