X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FR600%2FR600InstrInfo.cpp;h=0281dd0cbdb472bc9b7c3b5e177270feb7507113;hb=2cf43de915e1a56052dc0fffe94329518b5eaa08;hp=3b1a24015a821da400b129b4a06d5a1b4c24cc54;hpb=cedcfee405a22b245e869abe8609f094df34085a;p=oota-llvm.git diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 3b1a24015a8..0281dd0cbdb 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -23,7 +23,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#define GET_INSTRINFO_CTOR +#define GET_INSTRINFO_CTOR_DTOR #include "AMDGPUGenDFAPacketizer.inc" using namespace llvm; @@ -51,9 +51,17 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { - if (AMDGPU::R600_Reg128RegClass.contains(DestReg) - && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { - for (unsigned I = 0; I < 4; I++) { + unsigned VectorComponents = 0; + if (AMDGPU::R600_Reg128RegClass.contains(DestReg) && + AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { + VectorComponents = 4; + } else if(AMDGPU::R600_Reg64RegClass.contains(DestReg) && + AMDGPU::R600_Reg64RegClass.contains(SrcReg)) { + VectorComponents = 2; + } + + if (VectorComponents > 0) { + for (unsigned I = 0; I < VectorComponents; I++) { unsigned SubRegIndex = RI.getSubRegFromChannel(I); buildDefaultInstruction(MBB, MI, AMDGPU::MOV, RI.getSubReg(DestReg, SubRegIndex), @@ -62,11 +70,6 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, RegState::Define | RegState::Implicit); } } else { - - // We can't copy vec4 registers - assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg) - && !AMDGPU::R600_Reg128RegClass.contains(SrcReg)); - MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV, DestReg, SrcReg); NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0)) @@ -74,16 +77,16 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } } -MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF, - unsigned DstReg, int64_t Imm) const { - MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc()); - MachineInstrBuilder MIB(*MF, MI); - MIB.addReg(DstReg, RegState::Define); - MIB.addReg(AMDGPU::ALU_LITERAL_X); - MIB.addImm(Imm); - MIB.addReg(0); // PREDICATE_BIT - - return MI; +/// \returns true if \p MBBI can be moved into a new basic. +bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const { + for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(), + E = MBBI->operands_end(); I != E; ++I) { + if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) && + I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg())) + return false; + } + return true; } unsigned R600InstrInfo::getIEQOpcode() const { @@ -114,9 +117,7 @@ bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const { } bool R600InstrInfo::isReductionOp(unsigned Opcode) const { - switch(Opcode) { - default: return false; - } + return false; } bool R600InstrInfo::isCubeOp(unsigned Opcode) const { @@ -136,14 +137,70 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const { return (TargetFlags & R600_InstFlag::ALU_INST); } +bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const { + unsigned TargetFlags = get(Opcode).TSFlags; + + return ((TargetFlags & R600_InstFlag::OP1) | + (TargetFlags & R600_InstFlag::OP2) | + (TargetFlags & R600_InstFlag::OP3)); +} + +bool R600InstrInfo::isLDSInstr(unsigned Opcode) const { + unsigned TargetFlags = get(Opcode).TSFlags; + + return ((TargetFlags & R600_InstFlag::LDS_1A) | + (TargetFlags & R600_InstFlag::LDS_1A1D) | + (TargetFlags & R600_InstFlag::LDS_1A2D)); +} + +bool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const { + return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1; +} + +bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const { + return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1; +} + +bool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const { + if (isALUInstr(MI->getOpcode())) + return true; + if (isVector(*MI) || isCubeOp(MI->getOpcode())) + return true; + switch (MI->getOpcode()) { + case AMDGPU::PRED_X: + case AMDGPU::INTERP_PAIR_XY: + case AMDGPU::INTERP_PAIR_ZW: + case AMDGPU::INTERP_VEC_LOAD: + case AMDGPU::COPY: + case AMDGPU::DOT_4: + return true; + default: + return false; + } +} + bool R600InstrInfo::isTransOnly(unsigned Opcode) const { - return (get(Opcode).TSFlags & R600_InstFlag::TRANS_ONLY); + if (ST.hasCaymanISA()) + return false; + return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU); } bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const { return isTransOnly(MI->getOpcode()); } +bool R600InstrInfo::isVectorOnly(unsigned Opcode) const { + return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU); +} + +bool R600InstrInfo::isVectorOnly(const MachineInstr *MI) const { + return isVectorOnly(MI->getOpcode()); +} + +bool R600InstrInfo::isExport(unsigned Opcode) const { + return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT); +} + bool R600InstrInfo::usesVertexCache(unsigned Opcode) const { return ST.hasVertexCache() && IS_VTX(get(Opcode)); } @@ -173,6 +230,66 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const { } } +bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const { + return MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1; +} + +bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const { + return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1; +} + +bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const { + if (!isALUInstr(MI->getOpcode())) { + return false; + } + for (MachineInstr::const_mop_iterator I = MI->operands_begin(), + E = MI->operands_end(); I != E; ++I) { + if (!I->isReg() || !I->isUse() || + TargetRegisterInfo::isVirtualRegister(I->getReg())) + continue; + + if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg())) + return true; + } + return false; +} + +int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const { + static const unsigned OpTable[] = { + AMDGPU::OpName::src0, + AMDGPU::OpName::src1, + AMDGPU::OpName::src2 + }; + + assert (SrcNum < 3); + return getOperandIdx(Opcode, OpTable[SrcNum]); +} + +#define SRC_SEL_ROWS 11 +int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const { + static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = { + {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, + {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, + {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, + {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, + {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, + {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, + {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, + {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, + {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, + {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, + {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W} + }; + + for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) { + if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) { + return getOperandIdx(Opcode, SrcSelTable[i][1]); + } + } + return -1; +} +#undef SRC_SEL_ROWS + SmallVector, 3> R600InstrInfo::getSrcs(MachineInstr *MI) const { SmallVector, 3> Result; @@ -235,8 +352,9 @@ R600InstrInfo::getSrcs(MachineInstr *MI) const { std::vector > R600InstrInfo::ExtractSrcs(MachineInstr *MI, - const DenseMap &PV) - const { + const DenseMap &PV, + unsigned &ConstCount) const { + ConstCount = 0; const SmallVector, 3> Srcs = getSrcs(MI); const std::pair DummyPair(-1, 0); std::vector > Result; @@ -244,15 +362,20 @@ R600InstrInfo::ExtractSrcs(MachineInstr *MI, for (unsigned n = Srcs.size(); i < n; ++i) { unsigned Reg = Srcs[i].first->getReg(); unsigned Index = RI.getEncodingValue(Reg) & 0xff; - unsigned Chan = RI.getHWRegChan(Reg); - if (Index > 127) { - Result.push_back(DummyPair); - continue; + if (Reg == AMDGPU::OQAP) { + Result.push_back(std::pair(Index, 0)); } if (PV.find(Reg) != PV.end()) { + // 255 is used to tells its a PS/PV reg + Result.push_back(std::pair(255, 0)); + continue; + } + if (Index > 127) { + ConstCount++; Result.push_back(DummyPair); continue; } + unsigned Chan = RI.getHWRegChan(Reg); Result.push_back(std::pair(Index, Chan)); } for (; i < 3; ++i) @@ -263,16 +386,18 @@ R600InstrInfo::ExtractSrcs(MachineInstr *MI, static std::vector > Swizzle(std::vector > Src, R600InstrInfo::BankSwizzle Swz) { + if (Src[0] == Src[1]) + Src[1].first = -1; switch (Swz) { - case R600InstrInfo::ALU_VEC_012: + case R600InstrInfo::ALU_VEC_012_SCL_210: break; - case R600InstrInfo::ALU_VEC_021: + case R600InstrInfo::ALU_VEC_021_SCL_122: std::swap(Src[1], Src[2]); break; - case R600InstrInfo::ALU_VEC_102: + case R600InstrInfo::ALU_VEC_102_SCL_221: std::swap(Src[0], Src[1]); break; - case R600InstrInfo::ALU_VEC_120: + case R600InstrInfo::ALU_VEC_120_SCL_212: std::swap(Src[0], Src[1]); std::swap(Src[0], Src[2]); break; @@ -287,66 +412,185 @@ Swizzle(std::vector > Src, return Src; } -static bool -isLegal(const std::vector > > &IGSrcs, +static unsigned +getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) { + switch (Swz) { + case R600InstrInfo::ALU_VEC_012_SCL_210: { + unsigned Cycles[3] = { 2, 1, 0}; + return Cycles[Op]; + } + case R600InstrInfo::ALU_VEC_021_SCL_122: { + unsigned Cycles[3] = { 1, 2, 2}; + return Cycles[Op]; + } + case R600InstrInfo::ALU_VEC_120_SCL_212: { + unsigned Cycles[3] = { 2, 1, 2}; + return Cycles[Op]; + } + case R600InstrInfo::ALU_VEC_102_SCL_221: { + unsigned Cycles[3] = { 2, 2, 1}; + return Cycles[Op]; + } + default: + llvm_unreachable("Wrong Swizzle for Trans Slot"); + return 0; + } +} + +/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed +/// in the same Instruction Group while meeting read port limitations given a +/// Swz swizzle sequence. +unsigned R600InstrInfo::isLegalUpTo( + const std::vector > > &IGSrcs, const std::vector &Swz, - unsigned CheckedSize) { + const std::vector > &TransSrcs, + R600InstrInfo::BankSwizzle TransSwz) const { int Vector[4][3]; memset(Vector, -1, sizeof(Vector)); - for (unsigned i = 0; i < CheckedSize; i++) { + for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) { const std::vector > &Srcs = Swizzle(IGSrcs[i], Swz[i]); for (unsigned j = 0; j < 3; j++) { const std::pair &Src = Srcs[j]; - if (Src.first < 0) + if (Src.first < 0 || Src.first == 255) continue; + if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) { + if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 && + Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) { + // The value from output queue A (denoted by register OQAP) can + // only be fetched during the first cycle. + return false; + } + // OQAP does not count towards the normal read port restrictions + continue; + } if (Vector[Src.second][j] < 0) Vector[Src.second][j] = Src.first; if (Vector[Src.second][j] != Src.first) - return false; + return i; } } - return true; + // Now check Trans Alu + for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) { + const std::pair &Src = TransSrcs[i]; + unsigned Cycle = getTransSwizzle(TransSwz, i); + if (Src.first < 0) + continue; + if (Src.first == 255) + continue; + if (Vector[Src.second][Cycle] < 0) + Vector[Src.second][Cycle] = Src.first; + if (Vector[Src.second][Cycle] != Src.first) + return IGSrcs.size() - 1; + } + return IGSrcs.size(); } -static bool recursiveFitsFPLimitation( -const std::vector > > &IGSrcs, -std::vector &SwzCandidate, -unsigned Depth = 0) { - if (!isLegal(IGSrcs, SwzCandidate, Depth)) +/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next +/// (in lexicographic term) swizzle sequence assuming that all swizzles after +/// Idx can be skipped +static bool +NextPossibleSolution( + std::vector &SwzCandidate, + unsigned Idx) { + assert(Idx < SwzCandidate.size()); + int ResetIdx = Idx; + while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210) + ResetIdx --; + for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) { + SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210; + } + if (ResetIdx == -1) return false; - if (IGSrcs.size() == Depth) - return true; - unsigned i = SwzCandidate[Depth]; - for (; i < 6; i++) { - SwzCandidate[Depth] = (R600InstrInfo::BankSwizzle) i; - if (recursiveFitsFPLimitation(IGSrcs, SwzCandidate, Depth + 1)) + int NextSwizzle = SwzCandidate[ResetIdx] + 1; + SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle; + return true; +} + +/// Enumerate all possible Swizzle sequence to find one that can meet all +/// read port requirements. +bool R600InstrInfo::FindSwizzleForVectorSlot( + const std::vector > > &IGSrcs, + std::vector &SwzCandidate, + const std::vector > &TransSrcs, + R600InstrInfo::BankSwizzle TransSwz) const { + unsigned ValidUpTo = 0; + do { + ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz); + if (ValidUpTo == IGSrcs.size()) return true; - } - SwzCandidate[Depth] = R600InstrInfo::ALU_VEC_012; + } while (NextPossibleSolution(SwzCandidate, ValidUpTo)); return false; } +/// Instructions in Trans slot can't read gpr at cycle 0 if they also read +/// a const, and can't read a gpr at cycle 1 if they read 2 const. +static bool +isConstCompatible(R600InstrInfo::BankSwizzle TransSwz, + const std::vector > &TransOps, + unsigned ConstCount) { + // TransALU can't read 3 constants + if (ConstCount > 2) + return false; + for (unsigned i = 0, e = TransOps.size(); i < e; ++i) { + const std::pair &Src = TransOps[i]; + unsigned Cycle = getTransSwizzle(TransSwz, i); + if (Src.first < 0) + continue; + if (ConstCount > 0 && Cycle == 0) + return false; + if (ConstCount > 1 && Cycle == 1) + return false; + } + return true; +} + bool R600InstrInfo::fitsReadPortLimitations(const std::vector &IG, - const DenseMap &PV, - std::vector &ValidSwizzle) + const DenseMap &PV, + std::vector &ValidSwizzle, + bool isLastAluTrans) const { //Todo : support shared src0 - src1 operand std::vector > > IGSrcs; ValidSwizzle.clear(); + unsigned ConstCount; + BankSwizzle TransBS = ALU_VEC_012_SCL_210; for (unsigned i = 0, e = IG.size(); i < e; ++i) { - IGSrcs.push_back(ExtractSrcs(IG[i], PV)); + IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount)); unsigned Op = getOperandIdx(IG[i]->getOpcode(), AMDGPU::OpName::bank_swizzle); ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle) IG[i]->getOperand(Op).getImm()); } - bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle); - if (!Result) - return false; - return true; + std::vector > TransOps; + if (!isLastAluTrans) + return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS); + + TransOps = IGSrcs.back(); + IGSrcs.pop_back(); + ValidSwizzle.pop_back(); + + static const R600InstrInfo::BankSwizzle TransSwz[] = { + ALU_VEC_012_SCL_210, + ALU_VEC_021_SCL_122, + ALU_VEC_120_SCL_212, + ALU_VEC_102_SCL_221 + }; + for (unsigned i = 0; i < 4; i++) { + TransBS = TransSwz[i]; + if (!isConstCompatible(TransBS, TransOps, ConstCount)) + continue; + bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, + TransBS); + if (Result) { + ValidSwizzle.push_back(TransBS); + return true; + } + } + + return false; } @@ -376,18 +620,24 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector &Consts) } bool -R600InstrInfo::canBundle(const std::vector &MIs) const { +R600InstrInfo::fitsConstReadLimitations(const std::vector &MIs) + const { std::vector Consts; + SmallSet Literals; for (unsigned i = 0, n = MIs.size(); i < n; i++) { MachineInstr *MI = MIs[i]; if (!isALUInstr(MI->getOpcode())) continue; - const SmallVector, 3> &Srcs = + const SmallVectorImpl > &Srcs = getSrcs(MI); for (unsigned j = 0, e = Srcs.size(); j < e; j++) { std::pair Src = Srcs[j]; + if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X) + Literals.insert(Src.second); + if (Literals.size() > 4) + return false; if (Src.first->getReg() == AMDGPU::ALU_CONST) Consts.push_back(Src.second); if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) || @@ -435,6 +685,11 @@ bool isJump(unsigned Opcode) { return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND; } +static bool isBranch(unsigned Opcode) { + return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 || + Opcode == AMDGPU::BRANCH_COND_f32; +} + bool R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, @@ -453,11 +708,21 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return false; --I; } + // AMDGPU::BRANCH* instructions are only available after isel and are not + // handled + if (isBranch(I->getOpcode())) + return true; if (!isJump(static_cast(I)->getOpcode())) { return false; } - // Get the last instruction in the block. + // Remove successive JUMP + while (I != MBB.begin() && std::prev(I)->getOpcode() == AMDGPU::JUMP) { + MachineBasicBlock::iterator PriorI = std::prev(I); + if (AllowModify) + I->removeFromParent(); + I = PriorI; + } MachineInstr *LastInst = I; // If there is only one terminator instruction, process it. @@ -513,6 +778,17 @@ int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { }; } +static +MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) { + for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend(); + It != E; ++It) { + if (It->getOpcode() == AMDGPU::CF_ALU || + It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) + return std::prev(It.base()); + } + return MBB.end(); +} + unsigned R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, @@ -534,6 +810,11 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) .addMBB(TBB) .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); + MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); + if (CfAlu == MBB.end()) + return 1; + assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); + CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); return 1; } } else { @@ -545,6 +826,11 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, .addMBB(TBB) .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB); + MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); + if (CfAlu == MBB.end()) + return 2; + assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); + CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); return 2; } } @@ -568,6 +854,11 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); clearFlag(predSet, 0, MO_FLAG_PUSH); I->eraseFromParent(); + MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); + if (CfAlu == MBB.end()) + break; + assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); + CfAlu->setDesc(get(AMDGPU::CF_ALU)); break; } case AMDGPU::JUMP: @@ -588,6 +879,11 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); clearFlag(predSet, 0, MO_FLAG_PUSH); I->eraseFromParent(); + MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); + if (CfAlu == MBB.end()) + break; + assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); + CfAlu->setDesc(get(AMDGPU::CF_ALU)); break; } case AMDGPU::JUMP: @@ -622,6 +918,15 @@ R600InstrInfo::isPredicable(MachineInstr *MI) const { if (MI->getOpcode() == AMDGPU::KILLGT) { return false; + } else if (MI->getOpcode() == AMDGPU::CF_ALU) { + // If the clause start in the middle of MBB then the MBB has more + // than a single clause, unable to predicate several clauses. + if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI)) + return false; + // TODO: We don't support KC merging atm + if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0) + return false; + return true; } else if (isVector(*MI)) { return false; } else { @@ -717,6 +1022,25 @@ R600InstrInfo::PredicateInstruction(MachineInstr *MI, const SmallVectorImpl &Pred) const { int PIdx = MI->findFirstPredOperandIdx(); + if (MI->getOpcode() == AMDGPU::CF_ALU) { + MI->getOperand(8).setImm(0); + return true; + } + + if (MI->getOpcode() == AMDGPU::DOT_4) { + MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_X)) + .setReg(Pred[2].getReg()); + MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Y)) + .setReg(Pred[2].getReg()); + MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Z)) + .setReg(Pred[2].getReg()); + MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_W)) + .setReg(Pred[2].getReg()); + MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); + MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); + return true; + } + if (PIdx != -1) { MachineOperand &PMO = MI->getOperand(PIdx); PMO.setReg(Pred[2].getReg()); @@ -728,6 +1052,10 @@ R600InstrInfo::PredicateInstruction(MachineInstr *MI, return false; } +unsigned int R600InstrInfo::getPredicationCost(const MachineInstr *) const { + return 2; +} + unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost) const { @@ -736,67 +1064,25 @@ unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, return 2; } -int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { - const MachineRegisterInfo &MRI = MF.getRegInfo(); - const MachineFrameInfo *MFI = MF.getFrameInfo(); - int Offset = 0; - - if (MFI->getNumObjects() == 0) { - return -1; - } - - if (MRI.livein_empty()) { - return 0; - } - - for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), - LE = MRI.livein_end(); - LI != LE; ++LI) { - Offset = std::max(Offset, - GET_REG_INDEX(RI.getEncodingValue(LI->first))); - } - - return Offset + 1; -} - -int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { - int Offset = 0; - const MachineFrameInfo *MFI = MF.getFrameInfo(); - - // Variable sized objects are not supported - assert(!MFI->hasVarSizedObjects()); - - if (MFI->getNumObjects() == 0) { - return -1; - } - - Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1); - - return getIndirectIndexBegin(MF) + Offset; -} - -std::vector R600InstrInfo::getIndirectReservedRegs( +void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved, const MachineFunction &MF) const { const AMDGPUFrameLowering *TFL = static_cast(TM.getFrameLowering()); - std::vector Regs; unsigned StackWidth = TFL->getStackWidth(MF); int End = getIndirectIndexEnd(MF); - if (End == -1) { - return Regs; - } + if (End == -1) + return; for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) { unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index); - Regs.push_back(SuperReg); + Reserved.set(SuperReg); for (unsigned Chan = 0; Chan < StackWidth; ++Chan) { unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan); - Regs.push_back(Reg); + Reserved.set(Reg); } } - return Regs; } unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, @@ -806,13 +1092,8 @@ unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, return RegIndex; } -const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass( - unsigned SourceReg) const { - return &AMDGPU::R600_TReg32RegClass; -} - -const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const { - return &AMDGPU::TRegMemRegClass; +const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const { + return &AMDGPU::R600_TReg32_XRegClass; } MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, @@ -851,10 +1132,6 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, return Mov; } -const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const { - return &AMDGPU::IndirectRegRegClass; -} - unsigned R600InstrInfo::getMaxAlusPerClause() const { return 115; } @@ -972,6 +1249,11 @@ MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( AMDGPU::OpName::src1_sel, }; + MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), + getSlotedOps(AMDGPU::OpName::pred_sel, Slot))); + MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel)) + .setReg(MO.getReg()); + for (unsigned i = 0; i < 14; i++) { MachineOperand &MO = MI->getOperand( getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot))); @@ -992,6 +1274,12 @@ MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB, return MovImm; } +MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned DstReg, unsigned SrcReg) const { + return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg); +} + int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const { return getOperandIdx(MI.getOpcode(), Op); }