X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FSSEDomainFix.cpp;h=13680c592e01b274dbfca95729fa68147ba43598;hb=c146c4d47a7ec54c14e730c30bea821c34dc4c48;hp=419c675190ab47c853e0b4940c0e0cea70f353fb;hpb=c75c5fa12582956fc6b7d7d756b2bdd49fa61f71;p=oota-llvm.git diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp index 419c675190a..13680c592e0 100644 --- a/lib/Target/X86/SSEDomainFix.cpp +++ b/lib/Target/X86/SSEDomainFix.cpp @@ -21,21 +21,101 @@ #define DEBUG_TYPE "sse-domain-fix" #include "X86InstrInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" - using namespace llvm; +/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track +/// of execution domains. +/// +/// An open DomainValue represents a set of instructions that can still switch +/// execution domain. Multiple registers may refer to the same open +/// DomainValue - they will eventually be collapsed to the same execution +/// domain. +/// +/// A collapsed DomainValue represents a single register that has been forced +/// into one of more execution domains. There is a separate collapsed +/// DomainValue for each register, but it may contain multiple execution +/// domains. A register value is initially created in a single execution +/// domain, but if we were forced to pay the penalty of a domain crossing, we +/// keep track of the fact the the register is now available in multiple +/// domains. +namespace { +struct DomainValue { + // Basic reference counting. + unsigned Refs; + + // Bitmask of available domains. For an open DomainValue, it is the still + // possible domains for collapsing. For a collapsed DomainValue it is the + // domains where the register is available for free. + unsigned AvailableDomains; + + // Position of the last defining instruction. + unsigned Dist; + + // Twiddleable instructions using or defining these registers. + SmallVector Instrs; + + // A collapsed DomainValue has no instructions to twiddle - it simply keeps + // track of the domains where the registers are already available. + bool isCollapsed() const { return Instrs.empty(); } + + // Is domain available? + bool hasDomain(unsigned domain) const { + return AvailableDomains & (1u << domain); + } + + // Mark domain as available. + void addDomain(unsigned domain) { + AvailableDomains |= 1u << domain; + } + + // Restrict to a single domain available. + void setSingleDomain(unsigned domain) { + AvailableDomains = 1u << domain; + } + + // Return bitmask of domains that are available and in mask. + unsigned getCommonDomains(unsigned mask) const { + return AvailableDomains & mask; + } + + // First domain available. + unsigned getFirstDomain() const { + return CountTrailingZeros_32(AvailableDomains); + } + + DomainValue() { clear(); } + + void clear() { + Refs = AvailableDomains = Dist = 0; + Instrs.clear(); + } +}; +} + +static const unsigned NumRegs = 16; + namespace { class SSEDomainFixPass : public MachineFunctionPass { static char ID; - const X86InstrInfo *TII; + SpecificBumpPtrAllocator Allocator; + SmallVector Avail; MachineFunction *MF; + const X86InstrInfo *TII; + const TargetRegisterInfo *TRI; MachineBasicBlock *MBB; + DomainValue **LiveRegs; + typedef DenseMap LiveOutMap; + LiveOutMap LiveOuts; + unsigned Distance; + public: - SSEDomainFixPass() : MachineFunctionPass(&ID) {} + SSEDomainFixPass() : MachineFunctionPass(ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -49,488 +129,378 @@ public: } private: - void enterBasicBlock(MachineBasicBlock *MBB); + // Register mapping. + int RegIndex(unsigned Reg); + + // DomainValue allocation. + DomainValue *Alloc(int domain = -1); + void Recycle(DomainValue*); + + // LiveRegs manipulations. + void SetLiveReg(int rx, DomainValue *DV); + void Kill(int rx); + void Force(int rx, unsigned domain); + void Collapse(DomainValue *dv, unsigned domain); + bool Merge(DomainValue *A, DomainValue *B); + + void enterBasicBlock(); + void visitGenericInstr(MachineInstr*); + void visitSoftInstr(MachineInstr*, unsigned mask); + void visitHardInstr(MachineInstr*, unsigned domain); }; } -void SSEDomainFixPass::enterBasicBlock(MachineBasicBlock *mbb) { - MBB = mbb; - DEBUG(dbgs() << "Entering MBB " << MBB->getName() << "\n"); +char SSEDomainFixPass::ID = 0; + +/// Translate TRI register number to an index into our smaller tables of +/// interesting registers. Return -1 for boring registers. +int SSEDomainFixPass::RegIndex(unsigned reg) { + assert(X86::XMM15 == X86::XMM0+NumRegs-1 && "Unexpected sort"); + reg -= X86::XMM0; + return reg < NumRegs ? (int) reg : -1; +} + +DomainValue *SSEDomainFixPass::Alloc(int domain) { + DomainValue *dv = Avail.empty() ? + new(Allocator.Allocate()) DomainValue : + Avail.pop_back_val(); + dv->Dist = Distance; + if (domain >= 0) + dv->addDomain(domain); + return dv; +} + +void SSEDomainFixPass::Recycle(DomainValue *dv) { + assert(dv && "Cannot recycle NULL"); + dv->clear(); + Avail.push_back(dv); +} + +/// Set LiveRegs[rx] = dv, updating reference counts. +void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) { + assert(unsigned(rx) < NumRegs && "Invalid index"); + if (!LiveRegs) { + LiveRegs = new DomainValue*[NumRegs]; + std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0); + } + + if (LiveRegs[rx] == dv) + return; + if (LiveRegs[rx]) { + assert(LiveRegs[rx]->Refs && "Bad refcount"); + if (--LiveRegs[rx]->Refs == 0) Recycle(LiveRegs[rx]); + } + LiveRegs[rx] = dv; + if (dv) ++dv->Refs; +} + +// Kill register rx, recycle or collapse any DomainValue. +void SSEDomainFixPass::Kill(int rx) { + assert(unsigned(rx) < NumRegs && "Invalid index"); + if (!LiveRegs || !LiveRegs[rx]) return; + + // Before killing the last reference to an open DomainValue, collapse it to + // the first available domain. + if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->isCollapsed()) + Collapse(LiveRegs[rx], LiveRegs[rx]->getFirstDomain()); + else + SetLiveReg(rx, 0); +} + +/// Force register rx into domain. +void SSEDomainFixPass::Force(int rx, unsigned domain) { + assert(unsigned(rx) < NumRegs && "Invalid index"); + DomainValue *dv; + if (LiveRegs && (dv = LiveRegs[rx])) { + if (dv->isCollapsed()) + dv->addDomain(domain); + else if (dv->hasDomain(domain)) + Collapse(dv, domain); + else { + // This is an incompatible open DomainValue. Collapse it to whatever and force + // the new value into domain. This costs a domain crossing. + Collapse(dv, dv->getFirstDomain()); + assert(LiveRegs[rx] && "Not live after collapse?"); + LiveRegs[rx]->addDomain(domain); + } + } else { + // Set up basic collapsed DomainValue. + SetLiveReg(rx, Alloc(domain)); + } +} + +/// Collapse open DomainValue into given domain. If there are multiple +/// registers using dv, they each get a unique collapsed DomainValue. +void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) { + assert(dv->hasDomain(domain) && "Cannot collapse"); + + // Collapse all the instructions. + while (!dv->Instrs.empty()) + TII->SetSSEDomain(dv->Instrs.pop_back_val(), domain); + dv->setSingleDomain(domain); + + // If there are multiple users, give them new, unique DomainValues. + if (LiveRegs && dv->Refs > 1) + for (unsigned rx = 0; rx != NumRegs; ++rx) + if (LiveRegs[rx] == dv) + SetLiveReg(rx, Alloc(domain)); +} + +/// Merge - All instructions and registers in B are moved to A, and B is +/// released. +bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) { + assert(!A->isCollapsed() && "Cannot merge into collapsed"); + assert(!B->isCollapsed() && "Cannot merge from collapsed"); + if (A == B) + return true; + // Restrict to the domains that A and B have in common. + unsigned common = A->getCommonDomains(B->AvailableDomains); + if (!common) + return false; + A->AvailableDomains = common; + A->Dist = std::max(A->Dist, B->Dist); + A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); + for (unsigned rx = 0; rx != NumRegs; ++rx) + if (LiveRegs[rx] == B) + SetLiveReg(rx, A); + return true; +} + +void SSEDomainFixPass::enterBasicBlock() { + // Try to coalesce live-out registers from predecessors. + for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), + e = MBB->livein_end(); i != e; ++i) { + int rx = RegIndex(*i); + if (rx < 0) continue; + for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), + pe = MBB->pred_end(); pi != pe; ++pi) { + LiveOutMap::const_iterator fi = LiveOuts.find(*pi); + if (fi == LiveOuts.end()) continue; + DomainValue *pdv = fi->second[rx]; + if (!pdv) continue; + if (!LiveRegs || !LiveRegs[rx]) { + SetLiveReg(rx, pdv); + continue; + } + + // We have a live DomainValue from more than one predecessor. + if (LiveRegs[rx]->isCollapsed()) { + // We are already collapsed, but predecessor is not. Force him. + unsigned domain = LiveRegs[rx]->getFirstDomain(); + if (!pdv->isCollapsed() && pdv->hasDomain(domain)) + Collapse(pdv, domain); + continue; + } + + // Currently open, merge in predecessor. + if (!pdv->isCollapsed()) + Merge(LiveRegs[rx], pdv); + else + Force(rx, pdv->getFirstDomain()); + } + } +} + +// A hard instruction only works in one domain. All input registers will be +// forced into that domain. +void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) { + // Collapse all uses. + for (unsigned i = mi->getDesc().getNumDefs(), + e = mi->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + int rx = RegIndex(mo.getReg()); + if (rx < 0) continue; + Force(rx, domain); + } + + // Kill all defs and force them. + for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + int rx = RegIndex(mo.getReg()); + if (rx < 0) continue; + Kill(rx); + Force(rx, domain); + } +} + +// A soft instruction can be changed to work in other domains given by mask. +void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) { + // Bitmask of available domains for this instruction after taking collapsed + // operands into account. + unsigned available = mask; + + // Scan the explicit use operands for incoming domains. + SmallVector used; + if (LiveRegs) + for (unsigned i = mi->getDesc().getNumDefs(), + e = mi->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + int rx = RegIndex(mo.getReg()); + if (rx < 0) continue; + if (DomainValue *dv = LiveRegs[rx]) { + // Bitmask of domains that dv and available have in common. + unsigned common = dv->getCommonDomains(available); + // Is it possible to use this collapsed register for free? + if (dv->isCollapsed()) { + // Restrict available domains to the ones in common with the operand. + // If there are no common domains, we must pay the cross-domain + // penalty for this operand. + if (common) available = common; + } else if (common) + // Open DomainValue is compatible, save it for merging. + used.push_back(rx); + else + // Open DomainValue is not compatible with instruction. It is useless + // now. + Kill(rx); + } + } + + // If the collapsed operands force a single domain, propagate the collapse. + if (isPowerOf2_32(available)) { + unsigned domain = CountTrailingZeros_32(available); + TII->SetSSEDomain(mi, domain); + visitHardInstr(mi, domain); + return; + } + + // Kill off any remaining uses that don't match available, and build a list of + // incoming DomainValues that we want to merge. + SmallVector doms; + for (SmallVector::iterator i=used.begin(), e=used.end(); i!=e; ++i) { + int rx = *i; + DomainValue *dv = LiveRegs[rx]; + // This useless DomainValue could have been missed above. + if (!dv->getCommonDomains(available)) { + Kill(*i); + continue; + } + // sorted, uniqued insert. + bool inserted = false; + for (SmallVector::iterator i = doms.begin(), e = doms.end(); + i != e && !inserted; ++i) { + if (dv == *i) + inserted = true; + else if (dv->Dist < (*i)->Dist) { + inserted = true; + doms.insert(i, dv); + } + } + if (!inserted) + doms.push_back(dv); + } + + // doms are now sorted in order of appearance. Try to merge them all, giving + // priority to the latest ones. + DomainValue *dv = 0; + while (!doms.empty()) { + if (!dv) { + dv = doms.pop_back_val(); + continue; + } + + DomainValue *latest = doms.pop_back_val(); + if (Merge(dv, latest)) continue; + + // If latest didn't merge, it is useless now. Kill all registers using it. + for (SmallVector::iterator i=used.begin(), e=used.end(); i != e; ++i) + if (LiveRegs[*i] == latest) + Kill(*i); + } + + // dv is the DomainValue we are going to use for this instruction. + if (!dv) + dv = Alloc(); + dv->Dist = Distance; + dv->AvailableDomains = available; + dv->Instrs.push_back(mi); + + // Finally set all defs and non-collapsed uses to dv. + for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + int rx = RegIndex(mo.getReg()); + if (rx < 0) continue; + if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) { + Kill(rx); + SetLiveReg(rx, dv); + } + } +} + +void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) { + // Process explicit defs, kill any XMM registers redefined. + for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + int rx = RegIndex(mo.getReg()); + if (rx < 0) continue; + Kill(rx); + } } bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) { MF = &mf; TII = static_cast(MF->getTarget().getInstrInfo()); + TRI = MF->getTarget().getRegisterInfo(); + MBB = 0; + LiveRegs = 0; + Distance = 0; + assert(NumRegs == X86::VR128RegClass.getNumRegs() && "Bad regclass"); + + // If no XMM registers are used in the function, we can skip it completely. + bool anyregs = false; + for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(), + E = X86::VR128RegClass.end(); I != E; ++I) + if (MF->getRegInfo().isPhysRegUsed(*I)) { + anyregs = true; + break; + } + if (!anyregs) return false; MachineBasicBlock *Entry = MF->begin(); SmallPtrSet Visited; - for (df_ext_iterator > + for (df_ext_iterator > DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited); - DFI != DFE; ++DFI) { - enterBasicBlock(*DFI); + DFI != DFE; ++DFI) { + MBB = *DFI; + enterBasicBlock(); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { - MachineInstr *MI = I; - const unsigned *equiv = 0; - X86InstrInfo::SSEDomain domain = TII->GetSSEDomain(MI, equiv); - DEBUG(dbgs() << "isd-"[domain] << (equiv ? "* " : " ") << *MI); + MachineInstr *mi = I; + if (mi->isDebugValue()) continue; + ++Distance; + std::pair domp = TII->GetSSEDomain(mi); + if (domp.first) + if (domp.second) + visitSoftInstr(mi, domp.second); + else + visitHardInstr(mi, domp.first); + else if (LiveRegs) + visitGenericInstr(mi); } + + // Save live registers at end of MBB - used by enterBasicBlock(). + if (LiveRegs) + LiveOuts.insert(std::make_pair(MBB, LiveRegs)); + LiveRegs = 0; } + + // Clear the LiveOuts vectors. Should we also collapse any remaining + // DomainValues? + for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end(); + i != e; ++i) + delete[] i->second; + LiveOuts.clear(); + Avail.clear(); + Allocator.DestroyAll(); + return false; } FunctionPass *llvm::createSSEDomainFixPass() { return new SSEDomainFixPass(); } - -// These are the replaceable instructions. Some of these have _Int variants -// that we don't include here. We don't want to replace instructions selected -// by intrinsics. -static const unsigned ReplaceableInstrs[][3] = { - //PackedInt PackedSingle PackedDouble - { X86::MOVDQAmr, X86::MOVAPSmr, X86::MOVAPDmr }, - { X86::MOVDQArm, X86::MOVAPSrm, X86::MOVAPDrm }, - { X86::MOVDQArr, X86::MOVAPSrr, X86::MOVAPDrr }, - { X86::MOVDQUmr, X86::MOVUPSmr, X86::MOVUPDmr }, - { X86::MOVDQUrm, X86::MOVUPSrm, X86::MOVUPDrm }, - { X86::MOVNTDQmr, X86::MOVNTPSmr, X86::MOVNTPDmr }, - { X86::PANDNrm, X86::ANDNPSrm, X86::ANDNPDrm }, - { X86::PANDNrr, X86::ANDNPSrr, X86::ANDNPDrr }, - { X86::PANDrm, X86::ANDPSrm, X86::ANDPDrm }, - { X86::PANDrr, X86::ANDPSrr, X86::ANDPDrr }, - { X86::PORrm, X86::ORPSrm, X86::ORPDrm }, - { X86::PORrr, X86::ORPSrr, X86::ORPDrr }, - { X86::PUNPCKHQDQrm, X86::UNPCKHPSrm, X86::UNPCKHPDrm }, - { X86::PUNPCKHQDQrr, X86::UNPCKHPSrr, X86::UNPCKHPDrr }, - { X86::PUNPCKLQDQrm, X86::UNPCKLPSrm, X86::UNPCKLPDrm }, - { X86::PUNPCKLQDQrr, X86::UNPCKLPSrr, X86::UNPCKLPDrr }, - { X86::PXORrm, X86::XORPSrm, X86::XORPDrm }, - { X86::PXORrr, X86::XORPSrr, X86::XORPDrr }, -}; - -void X86InstrInfo::populateSSEInstrDomainTable() { - // Instructions that execute in the packed integer domain. - static const unsigned PackedIntInstrs[] = { - X86::LDDQUrm, - X86::MASKMOVDQU, - X86::MASKMOVDQU64, - X86::MOVDI2PDIrm, - X86::MOVDI2PDIrr, - X86::MOVDQUmr_Int, - X86::MOVDQUrm_Int, - X86::MOVLQ128mr, - X86::MOVNTDQArm, - X86::MOVNTDQmr_Int, - X86::MOVNTDQ_64mr, - X86::MOVPDI2DImr, - X86::MOVPDI2DIrr, - X86::MOVPQI2QImr, - X86::MOVPQIto64rr, - X86::MOVQI2PQIrm, - X86::MOVQxrxr, - X86::MOVZDI2PDIrm, - X86::MOVZDI2PDIrr, - X86::MOVZPQILo2PQIrm, - X86::MOVZPQILo2PQIrr, - X86::MOVZQI2PQIrm, - X86::MOVZQI2PQIrr, - X86::MPSADBWrmi, - X86::MPSADBWrri, - X86::PABSBrm128, - X86::PABSBrr128, - X86::PABSDrm128, - X86::PABSDrr128, - X86::PABSWrm128, - X86::PABSWrr128, - X86::PACKSSDWrm, - X86::PACKSSDWrr, - X86::PACKSSWBrm, - X86::PACKSSWBrr, - X86::PACKUSDWrm, - X86::PACKUSDWrr, - X86::PACKUSWBrm, - X86::PACKUSWBrr, - X86::PADDBrm, - X86::PADDBrr, - X86::PADDDrm, - X86::PADDDrr, - X86::PADDQrm, - X86::PADDQrr, - X86::PADDSBrm, - X86::PADDSBrr, - X86::PADDSWrm, - X86::PADDSWrr, - X86::PADDUSBrm, - X86::PADDUSBrr, - X86::PADDUSWrm, - X86::PADDUSWrr, - X86::PADDWrm, - X86::PADDWrr, - X86::PALIGNR128rm, - X86::PALIGNR128rr, - X86::PAVGBrm, - X86::PAVGBrr, - X86::PAVGWrm, - X86::PAVGWrr, - X86::PBLENDVBrm0, - X86::PBLENDVBrr0, - X86::PBLENDWrmi, - X86::PBLENDWrri, - X86::PCMPEQBrm, - X86::PCMPEQBrr, - X86::PCMPEQDrm, - X86::PCMPEQDrr, - X86::PCMPEQQrm, - X86::PCMPEQQrr, - X86::PCMPEQWrm, - X86::PCMPEQWrr, - X86::PCMPESTRIArm, - X86::PCMPESTRIArr, - X86::PCMPESTRICrm, - X86::PCMPESTRICrr, - X86::PCMPESTRIOrm, - X86::PCMPESTRIOrr, - X86::PCMPESTRIrm, - X86::PCMPESTRIrr, - X86::PCMPESTRISrm, - X86::PCMPESTRISrr, - X86::PCMPESTRIZrm, - X86::PCMPESTRIZrr, - X86::PCMPESTRM128MEM, - X86::PCMPESTRM128REG, - X86::PCMPESTRM128rm, - X86::PCMPESTRM128rr, - X86::PCMPGTBrm, - X86::PCMPGTBrr, - X86::PCMPGTDrm, - X86::PCMPGTDrr, - X86::PCMPGTQrm, - X86::PCMPGTQrr, - X86::PCMPGTWrm, - X86::PCMPGTWrr, - X86::PCMPISTRIArm, - X86::PCMPISTRIArr, - X86::PCMPISTRICrm, - X86::PCMPISTRICrr, - X86::PCMPISTRIOrm, - X86::PCMPISTRIOrr, - X86::PCMPISTRIrm, - X86::PCMPISTRIrr, - X86::PCMPISTRISrm, - X86::PCMPISTRISrr, - X86::PCMPISTRIZrm, - X86::PCMPISTRIZrr, - X86::PCMPISTRM128MEM, - X86::PCMPISTRM128REG, - X86::PCMPISTRM128rm, - X86::PCMPISTRM128rr, - X86::PEXTRBmr, - X86::PEXTRBrr, - X86::PEXTRDmr, - X86::PEXTRDrr, - X86::PEXTRQmr, - X86::PEXTRQrr, - X86::PEXTRWmr, - X86::PEXTRWri, - X86::PHADDDrm128, - X86::PHADDDrr128, - X86::PHADDSWrm128, - X86::PHADDSWrr128, - X86::PHADDWrm128, - X86::PHADDWrr128, - X86::PHMINPOSUWrm128, - X86::PHMINPOSUWrr128, - X86::PHSUBDrm128, - X86::PHSUBDrr128, - X86::PHSUBSWrm128, - X86::PHSUBSWrr128, - X86::PHSUBWrm128, - X86::PHSUBWrr128, - X86::PINSRBrm, - X86::PINSRBrr, - X86::PINSRDrm, - X86::PINSRDrr, - X86::PINSRQrm, - X86::PINSRQrr, - X86::PINSRWrmi, - X86::PINSRWrri, - X86::PMADDUBSWrm128, - X86::PMADDUBSWrr128, - X86::PMADDWDrm, - X86::PMADDWDrr, - X86::PMAXSBrm, - X86::PMAXSBrr, - X86::PMAXSDrm, - X86::PMAXSDrr, - X86::PMAXSWrm, - X86::PMAXSWrr, - X86::PMAXUBrm, - X86::PMAXUBrr, - X86::PMAXUDrm, - X86::PMAXUDrr, - X86::PMAXUWrm, - X86::PMAXUWrr, - X86::PMINSBrm, - X86::PMINSBrr, - X86::PMINSDrm, - X86::PMINSDrr, - X86::PMINSWrm, - X86::PMINSWrr, - X86::PMINUBrm, - X86::PMINUBrr, - X86::PMINUDrm, - X86::PMINUDrr, - X86::PMINUWrm, - X86::PMINUWrr, - X86::PMOVSXBDrm, - X86::PMOVSXBDrr, - X86::PMOVSXBQrm, - X86::PMOVSXBQrr, - X86::PMOVSXBWrm, - X86::PMOVSXBWrr, - X86::PMOVSXDQrm, - X86::PMOVSXDQrr, - X86::PMOVSXWDrm, - X86::PMOVSXWDrr, - X86::PMOVSXWQrm, - X86::PMOVSXWQrr, - X86::PMOVZXBDrm, - X86::PMOVZXBDrr, - X86::PMOVZXBQrm, - X86::PMOVZXBQrr, - X86::PMOVZXBWrm, - X86::PMOVZXBWrr, - X86::PMOVZXDQrm, - X86::PMOVZXDQrr, - X86::PMOVZXWDrm, - X86::PMOVZXWDrr, - X86::PMOVZXWQrm, - X86::PMOVZXWQrr, - X86::PMULDQrm, - X86::PMULDQrr, - X86::PMULHRSWrm128, - X86::PMULHRSWrr128, - X86::PMULHUWrm, - X86::PMULHUWrr, - X86::PMULHWrm, - X86::PMULHWrr, - X86::PMULLDrm, - X86::PMULLDrm_int, - X86::PMULLDrr, - X86::PMULLDrr_int, - X86::PMULLWrm, - X86::PMULLWrr, - X86::PMULUDQrm, - X86::PMULUDQrr, - X86::PSADBWrm, - X86::PSADBWrr, - X86::PSHUFBrm128, - X86::PSHUFBrr128, - X86::PSHUFHWmi, - X86::PSHUFHWri, - X86::PSHUFLWmi, - X86::PSHUFLWri, - X86::PSIGNBrm128, - X86::PSIGNBrr128, - X86::PSIGNDrm128, - X86::PSIGNDrr128, - X86::PSIGNWrm128, - X86::PSIGNWrr128, - X86::PSLLDQri, - X86::PSLLDri, - X86::PSLLDrm, - X86::PSLLDrr, - X86::PSLLQri, - X86::PSLLQrm, - X86::PSLLQrr, - X86::PSLLWri, - X86::PSLLWrm, - X86::PSLLWrr, - X86::PSRADri, - X86::PSRADrm, - X86::PSRADrr, - X86::PSRAWri, - X86::PSRAWrm, - X86::PSRAWrr, - X86::PSRLDQri, - X86::PSRLDri, - X86::PSRLDrm, - X86::PSRLDrr, - X86::PSRLQri, - X86::PSRLQrm, - X86::PSRLQrr, - X86::PSRLWri, - X86::PSRLWrm, - X86::PSRLWrr, - X86::PSUBBrm, - X86::PSUBBrr, - X86::PSUBDrm, - X86::PSUBDrr, - X86::PSUBQrm, - X86::PSUBQrr, - X86::PSUBSBrm, - X86::PSUBSBrr, - X86::PSUBSWrm, - X86::PSUBSWrr, - X86::PSUBUSBrm, - X86::PSUBUSBrr, - X86::PSUBUSWrm, - X86::PSUBUSWrr, - X86::PSUBWrm, - X86::PSUBWrr, - X86::PUNPCKHBWrm, - X86::PUNPCKHBWrr, - X86::PUNPCKHWDrm, - X86::PUNPCKHWDrr, - X86::PUNPCKLBWrm, - X86::PUNPCKLBWrr, - X86::PUNPCKLWDrm, - X86::PUNPCKLWDrr, - }; - - // Instructions that execute in the packed single domain. - static const unsigned PackedSingleInstrs[] = { - X86::ADDPSrm, - X86::ADDPSrr, - X86::ADDSUBPSrm, - X86::ADDSUBPSrr, - X86::BLENDPSrmi, - X86::BLENDPSrri, - X86::BLENDVPSrm0, - X86::BLENDVPSrr0, - X86::CMPPSrmi, - X86::CMPPSrri, - X86::DIVPSrm, - X86::DIVPSrr, - X86::DPPSrmi, - X86::DPPSrri, - X86::EXTRACTPSmr, - X86::EXTRACTPSrr, - X86::HADDPSrm, - X86::HADDPSrr, - X86::HSUBPSrm, - X86::HSUBPSrr, - X86::INSERTPSrm, - X86::INSERTPSrr, - X86::MAXPSrm, - X86::MAXPSrm_Int, - X86::MAXPSrr, - X86::MAXPSrr_Int, - X86::MINPSrm, - X86::MINPSrm_Int, - X86::MINPSrr, - X86::MINPSrr_Int, - X86::MOVHLPSrr, - X86::MOVHPSmr, - X86::MOVHPSrm, - X86::MOVLHPSrr, - X86::MOVLPSmr, - X86::MOVLPSrm, - X86::MOVMSKPSrr, - X86::MOVNTPSmr_Int, - X86::MOVSHDUPrm, - X86::MOVSHDUPrr, - X86::MOVSLDUPrm, - X86::MOVSLDUPrr, - X86::MOVUPSmr_Int, - X86::MOVUPSrm_Int, - X86::MULPSrm, - X86::MULPSrr, - X86::RCPPSm, - X86::RCPPSm_Int, - X86::RCPPSr, - X86::RCPPSr_Int, - X86::ROUNDPSm_Int, - X86::ROUNDPSr_Int, - X86::RSQRTPSm, - X86::RSQRTPSm_Int, - X86::RSQRTPSr, - X86::RSQRTPSr_Int, - X86::SQRTPSm, - X86::SQRTPSm_Int, - X86::SQRTPSr, - X86::SQRTPSr_Int, - X86::SUBPSrm, - X86::SUBPSrr, - }; - - // Instructions that execute in the packed double domain. - static const unsigned PackedDoubleInstrs[] = { - X86::ADDPDrm, - X86::ADDPDrr, - X86::ADDSUBPDrm, - X86::ADDSUBPDrr, - X86::BLENDPDrmi, - X86::BLENDPDrri, - X86::BLENDVPDrm0, - X86::BLENDVPDrr0, - X86::CMPPDrmi, - X86::CMPPDrri, - X86::DIVPDrm, - X86::DIVPDrr, - X86::DPPDrmi, - X86::DPPDrri, - X86::HADDPDrm, - X86::HADDPDrr, - X86::HSUBPDrm, - X86::HSUBPDrr, - X86::MAXPDrm, - X86::MAXPDrm_Int, - X86::MAXPDrr, - X86::MAXPDrr_Int, - X86::MINPDrm, - X86::MINPDrm_Int, - X86::MINPDrr, - X86::MINPDrr_Int, - X86::MOVHPDmr, - X86::MOVHPDrm, - X86::MOVLPDmr, - X86::MOVLPDrm, - X86::MOVMSKPDrr, - X86::MOVNTPDmr_Int, - X86::MOVUPDmr_Int, - X86::MOVUPDrm_Int, - X86::MULPDrm, - X86::MULPDrr, - X86::ROUNDPDm_Int, - X86::ROUNDPDr_Int, - X86::SQRTPDm, - X86::SQRTPDm_Int, - X86::SQRTPDr, - X86::SQRTPDr_Int, - X86::SUBPDrm, - X86::SUBPDrr, - }; - - // Add non-negative entries for forcing instructions. - for (unsigned i = 0, e = array_lengthof(PackedIntInstrs); i != e; ++i) - SSEInstrDomainTable.insert(std::make_pair(PackedIntInstrs[i], - PackedInt)); - for (unsigned i = 0, e = array_lengthof(PackedSingleInstrs); i != e; ++i) - SSEInstrDomainTable.insert(std::make_pair(PackedSingleInstrs[i], - PackedSingle)); - for (unsigned i = 0, e = array_lengthof(PackedDoubleInstrs); i != e; ++i) - SSEInstrDomainTable.insert(std::make_pair(PackedDoubleInstrs[i], - PackedDouble)); - - // Add row number + 1 for replaceable instructions. - for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i) - for (unsigned c = 0; c != 3; ++c) - SSEInstrDomainTable.insert(std::make_pair(ReplaceableInstrs[i][c], - c + 4*(i+1))); -} - -X86InstrInfo::SSEDomain X86InstrInfo::GetSSEDomain(const MachineInstr *MI, - const unsigned *&equiv) const { - DenseMap::const_iterator i = - SSEInstrDomainTable.find(MI->getOpcode()); - if (i == SSEInstrDomainTable.end()) - return NotSSEDomain; - unsigned value = i->second; - if (value/4) - equiv = ReplaceableInstrs[value/4 - 1]; - else - equiv = 0; - return SSEDomain(value & 3); -}