1 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
12 /// This pass is merging consecutive CFAlus where applicable.
13 /// It needs to be called after IfCvt for best results.
14 //===----------------------------------------------------------------------===//
16 #define DEBUG_TYPE "r600mergeclause"
18 #include "R600Defines.h"
19 #include "R600InstrInfo.h"
20 #include "R600MachineFunctionInfo.h"
21 #include "R600RegisterInfo.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/raw_ostream.h"
32 static bool isCFAlu(const MachineInstr *MI) {
33 switch (MI->getOpcode()) {
35 case AMDGPU::CF_ALU_PUSH_BEFORE:
42 class R600ClauseMergePass : public MachineFunctionPass {
46 const R600InstrInfo *TII;
48 unsigned getCFAluSize(const MachineInstr *MI) const;
49 bool isCFAluEnabled(const MachineInstr *MI) const;
51 /// IfCvt pass can generate "disabled" ALU clause marker that need to be
52 /// removed and their content affected to the previous alu clause.
53 /// This function parse instructions after CFAlu until it find a disabled
54 /// CFAlu and merge the content, or an enabled CFAlu.
55 void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const;
57 /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
59 bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu)
63 R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
65 virtual bool runOnMachineFunction(MachineFunction &MF);
67 const char *getPassName() const;
70 char R600ClauseMergePass::ID = 0;
72 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const {
74 return MI->getOperand(
75 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm();
78 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const {
80 return MI->getOperand(
81 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm();
84 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu)
86 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
87 MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end();
90 while (I!= E && !isCFAlu(I))
94 MachineInstr *MI = I++;
95 if (isCFAluEnabled(MI))
97 CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
98 MI->eraseFromParent();
102 bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu,
103 const MachineInstr *LatrCFAlu) const {
104 assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
105 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
106 unsigned RootInstCount = getCFAluSize(RootCFAlu),
107 LaterInstCount = getCFAluSize(LatrCFAlu);
108 unsigned CumuledInsts = RootInstCount + LaterInstCount;
109 if (CumuledInsts >= TII->getMaxAlusPerClause()) {
110 DEBUG(dbgs() << "Excess inst counts\n");
113 if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
115 // Is KCache Bank 0 compatible ?
117 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
119 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
121 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
122 if (LatrCFAlu->getOperand(Mode0Idx).getImm() &&
123 RootCFAlu->getOperand(Mode0Idx).getImm() &&
124 (LatrCFAlu->getOperand(KBank0Idx).getImm() !=
125 RootCFAlu->getOperand(KBank0Idx).getImm() ||
126 LatrCFAlu->getOperand(KBank0LineIdx).getImm() !=
127 RootCFAlu->getOperand(KBank0LineIdx).getImm())) {
128 DEBUG(dbgs() << "Wrong KC0\n");
131 // Is KCache Bank 1 compatible ?
133 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
135 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
137 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
138 if (LatrCFAlu->getOperand(Mode1Idx).getImm() &&
139 RootCFAlu->getOperand(Mode1Idx).getImm() &&
140 (LatrCFAlu->getOperand(KBank1Idx).getImm() !=
141 RootCFAlu->getOperand(KBank1Idx).getImm() ||
142 LatrCFAlu->getOperand(KBank1LineIdx).getImm() !=
143 RootCFAlu->getOperand(KBank1LineIdx).getImm())) {
144 DEBUG(dbgs() << "Wrong KC0\n");
147 if (LatrCFAlu->getOperand(Mode0Idx).getImm()) {
148 RootCFAlu->getOperand(Mode0Idx).setImm(
149 LatrCFAlu->getOperand(Mode0Idx).getImm());
150 RootCFAlu->getOperand(KBank0Idx).setImm(
151 LatrCFAlu->getOperand(KBank0Idx).getImm());
152 RootCFAlu->getOperand(KBank0LineIdx).setImm(
153 LatrCFAlu->getOperand(KBank0LineIdx).getImm());
155 if (LatrCFAlu->getOperand(Mode1Idx).getImm()) {
156 RootCFAlu->getOperand(Mode1Idx).setImm(
157 LatrCFAlu->getOperand(Mode1Idx).getImm());
158 RootCFAlu->getOperand(KBank1Idx).setImm(
159 LatrCFAlu->getOperand(KBank1Idx).getImm());
160 RootCFAlu->getOperand(KBank1LineIdx).setImm(
161 LatrCFAlu->getOperand(KBank1LineIdx).getImm());
163 RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts);
164 RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode()));
168 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
169 TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
170 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
172 MachineBasicBlock &MBB = *BB;
173 MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
174 MachineBasicBlock::iterator LatestCFAlu = E;
176 MachineInstr *MI = I++;
177 if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
178 TII->mustBeLastInClause(MI->getOpcode()))
182 cleanPotentialDisabledCFAlu(MI);
184 if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) {
185 MI->eraseFromParent();
187 assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled");
195 const char *R600ClauseMergePass::getPassName() const {
196 return "R600 Merge Clause Markers Pass";
199 } // end anonymous namespace
202 llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
203 return new R600ClauseMergePass(TM);