#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-namespace llvm {
+using namespace llvm;
+
+namespace {
class R600EmitClauseMarkersPass : public MachineFunctionPass {
private:
static char ID;
const R600InstrInfo *TII;
+ int Address;
unsigned OccupiedDwords(MachineInstr *MI) const {
switch (MI->getOpcode()) {
case AMDGPU::INTERP_PAIR_XY:
case AMDGPU::INTERP_PAIR_ZW:
case AMDGPU::INTERP_VEC_LOAD:
- case AMDGPU::DOT4_eg_pseudo:
- case AMDGPU::DOT4_r600_pseudo:
+ case AMDGPU::DOT_4:
return 4;
case AMDGPU::KILL:
return 0;
case AMDGPU::INTERP_PAIR_ZW:
case AMDGPU::INTERP_VEC_LOAD:
case AMDGPU::COPY:
- case AMDGPU::DOT4_eg_pseudo:
- case AMDGPU::DOT4_r600_pseudo:
+ case AMDGPU::DOT_4:
return true;
default:
return false;
bool SubstituteKCacheBank(MachineInstr *MI,
std::vector<std::pair<unsigned, unsigned> > &CachedConsts) const {
std::vector<std::pair<unsigned, unsigned> > UsedKCache;
- const SmallVector<std::pair<MachineOperand *, int64_t>, 3> &Consts =
+ const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Consts =
TII->getSrcs(MI);
- assert(TII->isALUInstr(MI->getOpcode()) && "Can't assign Const");
+ assert((TII->isALUInstr(MI->getOpcode()) ||
+ MI->getOpcode() == AMDGPU::DOT_4) && "Can't assign Const");
for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
continue;
}
MachineBasicBlock::iterator
- MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
+ MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
MachineBasicBlock::iterator ClauseHead = I;
std::vector<std::pair<unsigned, unsigned> > KCacheBanks;
bool PushBeforeModifier = false;
AluInstCount ++;
continue;
}
- if (I->getOpcode() == AMDGPU::KILLGT) {
+ // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as:
+ //
+ // * KILL or INTERP instructions
+ // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits
+ // * Uses waterfalling (i.e. INDEX_MODE = AR.X)
+ //
+ // XXX: These checks have not been implemented yet.
+ if (TII->mustBeLastInClause(I->getOpcode())) {
I++;
break;
}
if (TII->isALUInstr(I->getOpcode()) &&
!SubstituteKCacheBank(I, KCacheBanks))
break;
+ if (I->getOpcode() == AMDGPU::DOT_4 &&
+ !SubstituteKCacheBank(I, KCacheBanks))
+ break;
AluInstCount += OccupiedDwords(I);
}
unsigned Opcode = PushBeforeModifier ?
AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU;
BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
- .addImm(0) // ADDR
+ // We don't use the ADDR field until R600ControlFlowFinalizer pass, where
+ // it is safe to assume it is 0. However if we always put 0 here, the ifcvt
+ // pass may assume that identical ALU clause starter at the beginning of a
+ // true and false branch can be factorized which is not the case.
+ .addImm(Address++) // ADDR
.addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
.addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
.addImm(KCacheBanks.empty()?0:2) // KM0
.addImm((KCacheBanks.size() < 2)?0:2) // KM1
.addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
.addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
- .addImm(AluInstCount); // COUNT
+ .addImm(AluInstCount) // COUNT
+ .addImm(1); // Enabled
return I;
}
public:
R600EmitClauseMarkersPass(TargetMachine &tm) : MachineFunctionPass(ID),
- TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
+ TII(0), Address(0) { }
virtual bool runOnMachineFunction(MachineFunction &MF) {
+ TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
+
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
MachineBasicBlock &MBB = *BB;
char R600EmitClauseMarkersPass::ID = 0;
-}
+} // end anonymous namespace
llvm::FunctionPass *llvm::createR600EmitClauseMarkers(TargetMachine &TM) {