return TRI->getRegClass(RegClass)->hasSubClassEq(RC);
}
-/// \brief Make sure that we don't exeed the number of allowed scalars
-void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
- unsigned RegClass,
- bool &ScalarSlotUsed) const {
-
- if (!isVSrc(RegClass))
- return;
-
- // First map the operands register class to a destination class
- switch (RegClass) {
- case AMDGPU::VSrc_32RegClassID:
- case AMDGPU::VCSrc_32RegClassID:
- RegClass = AMDGPU::VReg_32RegClassID;
- break;
- case AMDGPU::VSrc_64RegClassID:
- case AMDGPU::VCSrc_64RegClassID:
- RegClass = AMDGPU::VReg_64RegClassID;
- break;
- default:
- llvm_unreachable("Unknown vsrc reg class");
- }
-
- // Nothing to do if they fit naturally
- if (fitsRegClass(DAG, Operand, RegClass))
- return;
-
- // If the scalar slot isn't used yet use it now
- if (!ScalarSlotUsed) {
- ScalarSlotUsed = true;
- return;
- }
-
- // This is a conservative aproach. It is possible that we can't determine the
- // correct register class and copy too often, but better safe than sorry.
-
- SDNode *Node;
- // We can't use COPY_TO_REGCLASS with FrameIndex arguments.
- if (isa<FrameIndexSDNode>(Operand) ||
- isa<GlobalAddressSDNode>(Operand)) {
- unsigned Opcode = Operand.getValueType() == MVT::i32 ?
- AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
- Node = DAG.getMachineNode(Opcode, SDLoc(), Operand.getValueType(),
- Operand);
- } else {
- SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32);
- Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, SDLoc(),
- Operand.getValueType(), Operand, RC);
- }
- Operand = SDValue(Node, 0);
-}
-
/// \returns true if \p Node's operands are different from the SDValue list
/// \p Ops
static bool isNodeChanged(const SDNode *Node, const std::vector<SDValue> &Ops) {
return false;
}
-/// \brief Try to commute instructions and insert copies in order to satisfy the
-/// operand constraints.
+/// TODO: This needs to be removed. It's current primary purpose is to fold
+/// immediates into operands when legal. The legalization parts are redundant
+/// with SIInstrInfo::legalizeOperands which is called in a post-isel hook.
SDNode *SITargetLowering::legalizeOperands(MachineSDNode *Node,
SelectionDAG &DAG) const {
// Original encoding (either e32 or e64)
// Is this a VSrc or SSrc operand?
unsigned RegClass = Desc->OpInfo[Op].RegClass;
if (isVSrc(RegClass) || isSSrc(RegClass)) {
- // Try to fold the immediates
- if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) {
- // Folding didn't work, make sure we don't hit the SReg limit.
- ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed);
- }
+ // Try to fold the immediates. If this ends up with multiple constant bus
+ // uses, it will be legalized later.
+ foldImm(Ops[i], Immediate, ScalarSlotUsed);
continue;
}
const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
getTargetMachine().getSubtargetImpl()->getInstrInfo());
+ TII->legalizeOperands(MI);
+
if (TII->isMIMG(MI->getOpcode())) {
unsigned VReg = MI->getOperand(0).getReg();
unsigned Writemask = MI->getOperand(1).getImm();
const SDValue &Op) const;
bool fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
unsigned RegClass) const;
- void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
- unsigned RegClass, bool &ScalarSlotUsed) const;
SDNode *legalizeOperands(MachineSDNode *N, SelectionDAG &DAG) const;
void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
let TSFlags{10} = MUBUF;
let TSFlags{11} = MTBUF;
let TSFlags{12} = FLAT;
+
+ // Most instructions require adjustments after selection to satisfy
+ // operand requirements.
+ let hasPostISelHook = 1;
}
class Enc32 {
int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx };
- // First we need to consider the instruction's operand requirements before
- // legalizing. Some operands are required to be SGPRs, but we are still
- // bound by the constant bus requirement to only use one.
- //
- // If the operand's class is an SGPR, we can never move it.
- for (unsigned i = 0; i < 3; ++i) {
- int Idx = VOP3Idx[i];
- if (Idx == -1)
+ for (const MachineOperand &MO : MI->implicit_operands()) {
+ // We only care about reads.
+ if (MO.isDef())
+ continue;
+
+ if (MO.getReg() == AMDGPU::VCC) {
+ SGPRReg = AMDGPU::VCC;
break;
+ }
- if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass)) {
- SGPRReg = MI->getOperand(Idx).getReg();
+ if (MO.getReg() == AMDGPU::FLAT_SCR) {
+ SGPRReg = AMDGPU::FLAT_SCR;
break;
}
}
+
+ if (SGPRReg == AMDGPU::NoRegister) {
+ // First we need to consider the instruction's operand requirements before
+ // legalizing. Some operands are required to be SGPRs, but we are still
+ // bound by the constant bus requirement to only use one.
+ //
+ // If the operand's class is an SGPR, we can never move it.
+ for (unsigned i = 0; i < 3; ++i) {
+ int Idx = VOP3Idx[i];
+ if (Idx == -1)
+ break;
+
+ if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass)) {
+ SGPRReg = MI->getOperand(Idx).getReg();
+ break;
+ }
+ }
+ }
+
for (unsigned i = 0; i < 3; ++i) {
int Idx = VOP3Idx[i];
if (Idx == -1)
; SI-LABEL: @fneg_fold
; SI: S_LOAD_DWORDX2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI-NOT: XOR
-; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], {{v\[[0-9]+:[0-9]+\]}}
+; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
define void @fneg_fold_f64(double addrspace(1)* %out, double %in) {
%fsub = fsub double -0.0, %in
%fmul = fmul double %fsub, %in
; FUNC-LABEL: @fneg_fold
; SI: S_LOAD_DWORD [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; SI-NOT: XOR
-; SI: V_MUL_F32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], v{{[0-9]+}}
+; SI: V_MUL_F32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
define void @fneg_fold_f32(float addrspace(1)* %out, float %in) {
%fsub = fsub float -0.0, %in
%fmul = fmul float %fsub, %in
; SI: S_SUB_I32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}}
; SI: V_MOV_B32_e32 [[VDST:v[0-9]+]], [[SDST]]
-; SI: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, v[0-9]+}}, [[VDST]]
+; SI: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, s[0-9]+}}, [[VDST]]
define void @rotl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) {
entry:
%0 = shl i32 %x, %y
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-
-;CHECK-LABEL: @main
-;CHECK: V_CMP_O_F32_e32 vcc, {{[sv][0-9]+, v[0-9]+}}
+; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
+; CHECK-LABEL: @main
+; CHECK: V_CMP_O_F32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
+; CHECK-NEXT: V_CNDMASK_B32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
define void @main(float %p) {
main_body:
%c = fcmp oeq float %p, %p
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-
-;CHECK-LABEL: @main
-;CHECK: V_CMP_U_F32_e32 vcc, {{[sv][0-9]+, v[0-9]+}}
+; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
+; CHECK-LABEL: @main
+; CHECK: V_CMP_U_F32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
+; CHECK-NEXT: V_CNDMASK_B32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
define void @main(float %p) {
main_body:
%c = fcmp une float %p, %p