From: Vincent Lejeune Date: Fri, 17 May 2013 16:50:02 +0000 (+0000) Subject: R600: Some factorization X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=25c209e9a262b623deca60fb6b886907e22c941b;p=oota-llvm.git R600: Some factorization git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182123 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index f1e07326e27..65a4801be03 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -172,22 +172,20 @@ private: AMDGPU::ALU_LITERAL_Z, AMDGPU::ALU_LITERAL_W }; - for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - if (MO.getReg() != AMDGPU::ALU_LITERAL_X) + const SmallVector, 3 > Srcs = + TII->getSrcs(MI); + for (unsigned i = 0, e = Srcs.size(); i < e; ++i) { + if (Srcs[i].first->getReg() != AMDGPU::ALU_LITERAL_X) continue; - unsigned ImmIdx = TII->getOperandIdx(MI->getOpcode(), R600Operands::IMM); - int64_t Imm = MI->getOperand(ImmIdx).getImm(); + int64_t Imm = Srcs[i].second; std::vector::iterator It = std::find(Lits.begin(), Lits.end(), Imm); if (It != Lits.end()) { unsigned Index = It - Lits.begin(); - MO.setReg(LiteralRegs[Index]); + Srcs[i].first->setReg(LiteralRegs[Index]); } else { assert(Lits.size() < 4 && "Too many literals in Instruction Group"); - MO.setReg(LiteralRegs[Lits.size()]); + Srcs[i].first->setReg(LiteralRegs[Lits.size()]); Lits.push_back(Imm); } } diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp index 3fdc678b9ef..bae39c5c1fc 100644 --- a/lib/Target/R600/R600EmitClauseMarkers.cpp +++ b/lib/Target/R600/R600EmitClauseMarkers.cpp @@ -89,31 +89,6 @@ private: } } - // Register Idx, then Const value - std::vector > ExtractConstRead(MachineInstr *MI) - const { - const R600Operands::Ops OpTable[3][2] = { - {R600Operands::SRC0, R600Operands::SRC0_SEL}, - {R600Operands::SRC1, R600Operands::SRC1_SEL}, - {R600Operands::SRC2, R600Operands::SRC2_SEL}, - }; - std::vector > Result; - - if (!TII->isALUInstr(MI->getOpcode())) - return Result; - for (unsigned j = 0; j < 3; j++) { - int SrcIdx = TII->getOperandIdx(MI->getOpcode(), OpTable[j][0]); - if (SrcIdx < 0) - break; - if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) { - unsigned Const = MI->getOperand( - TII->getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); - Result.push_back(std::pair(SrcIdx, Const)); - } - } - return Result; - } - std::pair getAccessedBankLine(unsigned Sel) const { // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2 // (See also R600ISelLowering.cpp) @@ -131,9 +106,12 @@ private: bool SubstituteKCacheBank(MachineInstr *MI, std::vector > &CachedConsts) const { std::vector > UsedKCache; - std::vector > Consts = ExtractConstRead(MI); + const SmallVector, 3> &Consts = + TII->getSrcs(MI); assert(TII->isALUInstr(MI->getOpcode()) && "Can't assign Const"); for (unsigned i = 0, n = Consts.size(); i < n; ++i) { + if (Consts[i].first->getReg() != AMDGPU::ALU_CONST) + continue; unsigned Sel = Consts[i].second; unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31; unsigned KCacheIndex = Index * 4 + Chan; @@ -159,19 +137,22 @@ private: return false; } - for (unsigned i = 0, n = Consts.size(); i < n; ++i) { - switch(UsedKCache[i].first) { + for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) { + if (Consts[i].first->getReg() != AMDGPU::ALU_CONST) + continue; + switch(UsedKCache[j].first) { case 0: - MI->getOperand(Consts[i].first).setReg( - AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[i].second)); + Consts[i].first->setReg( + AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[j].second)); break; case 1: - MI->getOperand(Consts[i].first).setReg( - AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[i].second)); + Consts[i].first->setReg( + AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[j].second)); break; default: llvm_unreachable("Wrong Cache Line"); } + j++; } return true; } diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 37150c430d2..56a8caf9c94 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -168,6 +168,156 @@ bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const { usesTextureCache(MI->getOpcode()); } +SmallVector, 3> +R600InstrInfo::getSrcs(MachineInstr *MI) const { + SmallVector, 3> Result; + + static const R600Operands::Ops OpTable[3][2] = { + {R600Operands::SRC0, R600Operands::SRC0_SEL}, + {R600Operands::SRC1, R600Operands::SRC1_SEL}, + {R600Operands::SRC2, R600Operands::SRC2_SEL}, + }; + + for (unsigned j = 0; j < 3; j++) { + int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); + if (SrcIdx < 0) + break; + MachineOperand &MO = MI->getOperand(SrcIdx); + unsigned Reg = MI->getOperand(SrcIdx).getReg(); + if (Reg == AMDGPU::ALU_CONST) { + unsigned Sel = MI->getOperand( + getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); + Result.push_back(std::pair(&MO, Sel)); + continue; + } + if (Reg == AMDGPU::ALU_LITERAL_X) { + unsigned Imm = MI->getOperand( + getOperandIdx(MI->getOpcode(), R600Operands::IMM)).getImm(); + Result.push_back(std::pair(&MO, Imm)); + continue; + } + Result.push_back(std::pair(&MO, 0)); + } + return Result; +} + +std::vector > +R600InstrInfo::ExtractSrcs(MachineInstr *MI, + const DenseMap &PV) + const { + const SmallVector, 3> Srcs = getSrcs(MI); + const std::pair DummyPair(-1, 0); + std::vector > Result; + unsigned i = 0; + for (unsigned n = Srcs.size(); i < n; ++i) { + unsigned Reg = Srcs[i].first->getReg(); + unsigned Index = RI.getEncodingValue(Reg) & 0xff; + unsigned Chan = RI.getHWRegChan(Reg); + if (Index > 127) { + Result.push_back(DummyPair); + continue; + } + if (PV.find(Index) != PV.end()) { + Result.push_back(DummyPair); + continue; + } + Result.push_back(std::pair(Index, Chan)); + } + for (; i < 3; ++i) + Result.push_back(DummyPair); + return Result; +} + +static std::vector > +Swizzle(std::vector > Src, + R600InstrInfo::BankSwizzle Swz) { + switch (Swz) { + case R600InstrInfo::ALU_VEC_012: + break; + case R600InstrInfo::ALU_VEC_021: + std::swap(Src[1], Src[2]); + break; + case R600InstrInfo::ALU_VEC_102: + std::swap(Src[0], Src[1]); + break; + case R600InstrInfo::ALU_VEC_120: + std::swap(Src[0], Src[1]); + std::swap(Src[0], Src[2]); + break; + case R600InstrInfo::ALU_VEC_201: + std::swap(Src[0], Src[2]); + std::swap(Src[0], Src[1]); + break; + case R600InstrInfo::ALU_VEC_210: + std::swap(Src[0], Src[2]); + break; + } + return Src; +} + +static bool +isLegal(const std::vector > > &IGSrcs, + const std::vector &Swz, + unsigned CheckedSize) { + int Vector[4][3]; + memset(Vector, -1, sizeof(Vector)); + for (unsigned i = 0; i < CheckedSize; i++) { + const std::vector > &Srcs = + Swizzle(IGSrcs[i], Swz[i]); + for (unsigned j = 0; j < 3; j++) { + const std::pair &Src = Srcs[j]; + if (Src.first < 0) + continue; + if (Vector[Src.second][j] < 0) + Vector[Src.second][j] = Src.first; + if (Vector[Src.second][j] != Src.first) + return false; + } + } + return true; +} + +static bool recursiveFitsFPLimitation( +const std::vector > > &IGSrcs, +std::vector &SwzCandidate, +unsigned Depth = 0) { + if (!isLegal(IGSrcs, SwzCandidate, Depth)) + return false; + if (IGSrcs.size() == Depth) + return true; + unsigned i = SwzCandidate[Depth]; + for (; i < 6; i++) { + SwzCandidate[Depth] = (R600InstrInfo::BankSwizzle) i; + if (recursiveFitsFPLimitation(IGSrcs, SwzCandidate, Depth + 1)) + return true; + } + SwzCandidate[Depth] = R600InstrInfo::ALU_VEC_012; + return false; +} + +bool +R600InstrInfo::fitsReadPortLimitations(const std::vector &IG, + const DenseMap &PV, + std::vector &ValidSwizzle) + const { + //Todo : support shared src0 - src1 operand + + std::vector > > IGSrcs; + ValidSwizzle.clear(); + for (unsigned i = 0, e = IG.size(); i < e; ++i) { + IGSrcs.push_back(ExtractSrcs(IG[i], PV)); + unsigned Op = getOperandIdx(IG[i]->getOpcode(), + R600Operands::BANK_SWIZZLE); + ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle) + IG[i]->getOperand(Op).getImm()); + } + bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle); + if (!Result) + return false; + return true; +} + + bool R600InstrInfo::fitsConstReadLimitations(const std::vector &Consts) const { @@ -197,34 +347,22 @@ bool R600InstrInfo::canBundle(const std::vector &MIs) const { std::vector Consts; for (unsigned i = 0, n = MIs.size(); i < n; i++) { - const MachineInstr *MI = MIs[i]; - - const R600Operands::Ops OpTable[3][2] = { - {R600Operands::SRC0, R600Operands::SRC0_SEL}, - {R600Operands::SRC1, R600Operands::SRC1_SEL}, - {R600Operands::SRC2, R600Operands::SRC2_SEL}, - }; - + MachineInstr *MI = MIs[i]; if (!isALUInstr(MI->getOpcode())) continue; - for (unsigned j = 0; j < 3; j++) { - int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); - if (SrcIdx < 0) - break; - unsigned Reg = MI->getOperand(SrcIdx).getReg(); - if (Reg == AMDGPU::ALU_CONST) { - unsigned Const = MI->getOperand( - getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); - Consts.push_back(Const); - continue; - } - if (AMDGPU::R600_KC0RegClass.contains(Reg) || - AMDGPU::R600_KC1RegClass.contains(Reg)) { - unsigned Index = RI.getEncodingValue(Reg) & 0xff; - unsigned Chan = RI.getHWRegChan(Reg); + const SmallVector, 3> &Srcs = + getSrcs(MI); + + for (unsigned j = 0, e = Srcs.size(); j < e; j++) { + std::pair Src = Srcs[j]; + if (Src.first->getReg() == AMDGPU::ALU_CONST) + Consts.push_back(Src.second); + if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) || + AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) { + unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff; + unsigned Chan = RI.getHWRegChan(Src.first->getReg()); Consts.push_back((Index << 2) | Chan); - continue; } } } diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index babe4b8fe51..5a84cd5a496 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -36,8 +36,19 @@ namespace llvm { const AMDGPUSubtarget &ST; int getBranchInstr(const MachineOperand &op) const; + std::vector > + ExtractSrcs(MachineInstr *MI, const DenseMap &PV) const; public: + enum BankSwizzle { + ALU_VEC_012 = 0, + ALU_VEC_021, + ALU_VEC_120, + ALU_VEC_102, + ALU_VEC_201, + ALU_VEC_210 + }; + explicit R600InstrInfo(AMDGPUTargetMachine &tm); const R600RegisterInfo &getRegisterInfo() const; @@ -62,6 +73,23 @@ namespace llvm { bool usesTextureCache(unsigned Opcode) const; bool usesTextureCache(const MachineInstr *MI) const; + /// \returns a pair for each src of an ALU instructions. + /// The first member of a pair is the register id. + /// If register is ALU_CONST, second member is SEL. + /// If register is ALU_LITERAL, second member is IMM. + /// Otherwise, second member value is undefined. + SmallVector, 3> + getSrcs(MachineInstr *MI) const; + + /// Given the order VEC_012 < VEC_021 < VEC_120 < VEC_102 < VEC_201 < VEC_210 + /// returns true and the first (in lexical order) BankSwizzle affectation + /// starting from the one already provided in the Instruction Group MIs that + /// fits Read Port limitations in BS if available. Otherwise returns false + /// and undefined content in BS. + /// PV holds GPR to PV registers in the Instruction Group MIs. + bool fitsReadPortLimitations(const std::vector &MIs, + const DenseMap &PV, + std::vector &BS) const; bool fitsConstReadLimitations(const std::vector&) const; bool canBundle(const std::vector &) const; diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp index cd7b7d0b219..3e6504ddf86 100644 --- a/lib/Target/R600/R600Packetizer.cpp +++ b/lib/Target/R600/R600Packetizer.cpp @@ -60,15 +60,6 @@ private: const R600InstrInfo *TII; const R600RegisterInfo &TRI; - enum BankSwizzle { - ALU_VEC_012 = 0, - ALU_VEC_021, - ALU_VEC_120, - ALU_VEC_102, - ALU_VEC_201, - ALU_VEC_210 - }; - unsigned getSlot(const MachineInstr *MI) const { return TRI.getHWRegChan(MI->getOperand(0).getReg()); } @@ -222,7 +213,9 @@ public: }); const DenseMap &PV = getPreviousVector(CurrentPacketMIs.front()); - bool FitsReadPortLimits = fitsReadPortLimitation(CurrentPacketMIs, PV); + std::vector BS; + bool FitsReadPortLimits = + TII->fitsReadPortLimitations(CurrentPacketMIs, PV, BS); DEBUG( if (!FitsReadPortLimits) { dbgs() << "Couldn't pack :\n"; @@ -235,6 +228,14 @@ public: dbgs() << "because of Read port limitations\n"; }); bool isBundlable = FitsConstLimits && FitsReadPortLimits; + if (isBundlable) { + for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) { + MachineInstr *MI = CurrentPacketMIs[i]; + unsigned Op = TII->getOperandIdx(MI->getOpcode(), + R600Operands::BANK_SWIZZLE); + MI->getOperand(Op).setImm(BS[i]); + } + } CurrentPacketMIs.pop_back(); if (!isBundlable) { endPacket(MI->getParent(), MI); @@ -246,134 +247,6 @@ public: substitutePV(MI, PV); return VLIWPacketizerList::addToPacket(MI); } -private: - std::vector > - ExtractSrcs(const MachineInstr *MI, const DenseMap &PV) - const { - R600Operands::Ops Ops[] = { - R600Operands::SRC0, - R600Operands::SRC1, - R600Operands::SRC2 - }; - std::vector > Result; - for (unsigned i = 0; i < 3; i++) { - int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]); - if (OperandIdx < 0){ - Result.push_back(std::pair(-1,0)); - continue; - } - unsigned Src = MI->getOperand(OperandIdx).getReg(); - if (PV.find(Src) != PV.end()) { - Result.push_back(std::pair(-1,0)); - continue; - } - unsigned Reg = TRI.getEncodingValue(Src) & 0xff; - if (Reg > 127) { - Result.push_back(std::pair(-1,0)); - continue; - } - unsigned Chan = TRI.getHWRegChan(Src); - Result.push_back(std::pair(Reg, Chan)); - } - return Result; - } - - std::vector > - Swizzle(std::vector > Src, - BankSwizzle Swz) const { - switch (Swz) { - case ALU_VEC_012: - break; - case ALU_VEC_021: - std::swap(Src[1], Src[2]); - break; - case ALU_VEC_102: - std::swap(Src[0], Src[1]); - break; - case ALU_VEC_120: - std::swap(Src[0], Src[1]); - std::swap(Src[0], Src[2]); - break; - case ALU_VEC_201: - std::swap(Src[0], Src[2]); - std::swap(Src[0], Src[1]); - break; - case ALU_VEC_210: - std::swap(Src[0], Src[2]); - break; - } - return Src; - } - - bool isLegal(const std::vector &IG, - const std::vector &Swz, - const DenseMap &PV) const { - assert (Swz.size() == IG.size()); - int Vector[4][3]; - memset(Vector, -1, sizeof(Vector)); - for (unsigned i = 0, e = IG.size(); i < e; i++) { - const std::vector > &Srcs = - Swizzle(ExtractSrcs(IG[i], PV), Swz[i]); - for (unsigned j = 0; j < 3; j++) { - const std::pair &Src = Srcs[j]; - if (Src.first < 0) - continue; - if (Vector[Src.second][j] < 0) - Vector[Src.second][j] = Src.first; - if (Vector[Src.second][j] != Src.first) - return false; - } - } - return true; - } - - bool recursiveFitsFPLimitation( - std::vector IG, - const DenseMap &PV, - std::vector &SwzCandidate, - std::vector CurrentlyChecked) - const { - if (!isLegal(CurrentlyChecked, SwzCandidate, PV)) - return false; - if (IG.size() == CurrentlyChecked.size()) { - return true; - } - BankSwizzle AvailableSwizzle[] = { - ALU_VEC_012, - ALU_VEC_021, - ALU_VEC_120, - ALU_VEC_102, - ALU_VEC_201, - ALU_VEC_210 - }; - CurrentlyChecked.push_back(IG[CurrentlyChecked.size()]); - for (unsigned i = 0; i < 6; i++) { - SwzCandidate.push_back(AvailableSwizzle[i]); - if (recursiveFitsFPLimitation(IG, PV, SwzCandidate, CurrentlyChecked)) - return true; - SwzCandidate.pop_back(); - } - return false; - } - - bool fitsReadPortLimitation( - std::vector IG, - const DenseMap &PV) - const { - //Todo : support shared src0 - src1 operand - std::vector SwzCandidate; - bool Result = recursiveFitsFPLimitation(IG, PV, SwzCandidate, - std::vector()); - if (!Result) - return false; - for (unsigned i = 0, e = IG.size(); i < e; i++) { - MachineInstr *MI = IG[i]; - unsigned Op = TII->getOperandIdx(MI->getOpcode(), - R600Operands::BANK_SWIZZLE); - MI->getOperand(Op).setImm(SwzCandidate[i]); - } - return true; - } }; bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {