1 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
12 /// This pass is merging consecutive CFAlus where applicable.
13 /// It needs to be called after IfCvt for best results.
14 //===----------------------------------------------------------------------===//
17 #include "R600Defines.h"
18 #include "R600InstrInfo.h"
19 #include "R600MachineFunctionInfo.h"
20 #include "R600RegisterInfo.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_ostream.h"
29 #define DEBUG_TYPE "r600mergeclause"
33 static bool isCFAlu(const MachineInstr *MI) {
34 switch (MI->getOpcode()) {
36 case AMDGPU::CF_ALU_PUSH_BEFORE:
43 class R600ClauseMergePass : public MachineFunctionPass {
47 const R600InstrInfo *TII;
49 unsigned getCFAluSize(const MachineInstr *MI) const;
50 bool isCFAluEnabled(const MachineInstr *MI) const;
52 /// IfCvt pass can generate "disabled" ALU clause marker that need to be
53 /// removed and their content affected to the previous alu clause.
54 /// This function parse instructions after CFAlu until it find a disabled
55 /// CFAlu and merge the content, or an enabled CFAlu.
56 void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const;
58 /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
60 bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu)
64 R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
66 bool runOnMachineFunction(MachineFunction &MF) override;
68 const char *getPassName() const override;
71 char R600ClauseMergePass::ID = 0;
73 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const {
75 return MI->getOperand(
76 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm();
79 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const {
81 return MI->getOperand(
82 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm();
85 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu)
87 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
88 MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end();
91 while (I!= E && !isCFAlu(I))
95 MachineInstr *MI = I++;
96 if (isCFAluEnabled(MI))
98 CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
99 MI->eraseFromParent();
103 bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu,
104 const MachineInstr *LatrCFAlu) const {
105 assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
106 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
107 unsigned RootInstCount = getCFAluSize(RootCFAlu),
108 LaterInstCount = getCFAluSize(LatrCFAlu);
109 unsigned CumuledInsts = RootInstCount + LaterInstCount;
110 if (CumuledInsts >= TII->getMaxAlusPerClause()) {
111 DEBUG(dbgs() << "Excess inst counts\n");
114 if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
116 // Is KCache Bank 0 compatible ?
118 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
120 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
122 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
123 if (LatrCFAlu->getOperand(Mode0Idx).getImm() &&
124 RootCFAlu->getOperand(Mode0Idx).getImm() &&
125 (LatrCFAlu->getOperand(KBank0Idx).getImm() !=
126 RootCFAlu->getOperand(KBank0Idx).getImm() ||
127 LatrCFAlu->getOperand(KBank0LineIdx).getImm() !=
128 RootCFAlu->getOperand(KBank0LineIdx).getImm())) {
129 DEBUG(dbgs() << "Wrong KC0\n");
132 // Is KCache Bank 1 compatible ?
134 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
136 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
138 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
139 if (LatrCFAlu->getOperand(Mode1Idx).getImm() &&
140 RootCFAlu->getOperand(Mode1Idx).getImm() &&
141 (LatrCFAlu->getOperand(KBank1Idx).getImm() !=
142 RootCFAlu->getOperand(KBank1Idx).getImm() ||
143 LatrCFAlu->getOperand(KBank1LineIdx).getImm() !=
144 RootCFAlu->getOperand(KBank1LineIdx).getImm())) {
145 DEBUG(dbgs() << "Wrong KC0\n");
148 if (LatrCFAlu->getOperand(Mode0Idx).getImm()) {
149 RootCFAlu->getOperand(Mode0Idx).setImm(
150 LatrCFAlu->getOperand(Mode0Idx).getImm());
151 RootCFAlu->getOperand(KBank0Idx).setImm(
152 LatrCFAlu->getOperand(KBank0Idx).getImm());
153 RootCFAlu->getOperand(KBank0LineIdx).setImm(
154 LatrCFAlu->getOperand(KBank0LineIdx).getImm());
156 if (LatrCFAlu->getOperand(Mode1Idx).getImm()) {
157 RootCFAlu->getOperand(Mode1Idx).setImm(
158 LatrCFAlu->getOperand(Mode1Idx).getImm());
159 RootCFAlu->getOperand(KBank1Idx).setImm(
160 LatrCFAlu->getOperand(KBank1Idx).getImm());
161 RootCFAlu->getOperand(KBank1LineIdx).setImm(
162 LatrCFAlu->getOperand(KBank1LineIdx).getImm());
164 RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts);
165 RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode()));
169 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
170 TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
171 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
173 MachineBasicBlock &MBB = *BB;
174 MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
175 MachineBasicBlock::iterator LatestCFAlu = E;
177 MachineInstr *MI = I++;
178 if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
179 TII->mustBeLastInClause(MI->getOpcode()))
183 cleanPotentialDisabledCFAlu(MI);
185 if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) {
186 MI->eraseFromParent();
188 assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled");
196 const char *R600ClauseMergePass::getPassName() const {
197 return "R600 Merge Clause Markers Pass";
200 } // end anonymous namespace
203 llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
204 return new R600ClauseMergePass(TM);