#define DEBUG_TYPE "sse-domain-fix"
#include "X86InstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-
using namespace llvm;
+/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
+/// of execution domains.
+///
+/// An open DomainValue represents a set of instructions that can still switch
+/// execution domain. Multiple registers may refer to the same open
+/// DomainValue - they will eventually be collapsed to the same execution
+/// domain.
+///
+/// A collapsed DomainValue represents a single register that has been forced
+/// into one of more execution domains. There is a separate collapsed
+/// DomainValue for each register, but it may contain multiple execution
+/// domains. A register value is initially created in a single execution
+/// domain, but if we were forced to pay the penalty of a domain crossing, we
+/// keep track of the fact the the register is now available in multiple
+/// domains.
+namespace {
+struct DomainValue {
+ // Basic reference counting.
+ unsigned Refs;
+
+ // Bitmask of available domains. For an open DomainValue, it is the still
+ // possible domains for collapsing. For a collapsed DomainValue it is the
+ // domains where the register is available for free.
+ unsigned AvailableDomains;
+
+ // Position of the last defining instruction.
+ unsigned Dist;
+
+ // Twiddleable instructions using or defining these registers.
+ SmallVector<MachineInstr*, 8> Instrs;
+
+ // A collapsed DomainValue has no instructions to twiddle - it simply keeps
+ // track of the domains where the registers are already available.
+ bool isCollapsed() const { return Instrs.empty(); }
+
+ // Is domain available?
+ bool hasDomain(unsigned domain) const {
+ return AvailableDomains & (1u << domain);
+ }
+
+ // Mark domain as available.
+ void addDomain(unsigned domain) {
+ AvailableDomains |= 1u << domain;
+ }
+
+ // Restrict to a single domain available.
+ void setSingleDomain(unsigned domain) {
+ AvailableDomains = 1u << domain;
+ }
+
+ // Return bitmask of domains that are available and in mask.
+ unsigned getCommonDomains(unsigned mask) const {
+ return AvailableDomains & mask;
+ }
+
+ // First domain available.
+ unsigned getFirstDomain() const {
+ return CountTrailingZeros_32(AvailableDomains);
+ }
+
+ DomainValue() { clear(); }
+
+ void clear() {
+ Refs = AvailableDomains = Dist = 0;
+ Instrs.clear();
+ }
+};
+}
+
+static const unsigned NumRegs = 16;
+
namespace {
class SSEDomainFixPass : public MachineFunctionPass {
static char ID;
- const X86InstrInfo *TII;
+ SpecificBumpPtrAllocator<DomainValue> Allocator;
+ SmallVector<DomainValue*,16> Avail;
MachineFunction *MF;
+ const X86InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
MachineBasicBlock *MBB;
+ DomainValue **LiveRegs;
+ typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap;
+ LiveOutMap LiveOuts;
+ unsigned Distance;
+
public:
- SSEDomainFixPass() : MachineFunctionPass(&ID) {}
+ SSEDomainFixPass() : MachineFunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
}
private:
- void enterBasicBlock(MachineBasicBlock *MBB);
+ // Register mapping.
+ int RegIndex(unsigned Reg);
+
+ // DomainValue allocation.
+ DomainValue *Alloc(int domain = -1);
+ void Recycle(DomainValue*);
+
+ // LiveRegs manipulations.
+ void SetLiveReg(int rx, DomainValue *DV);
+ void Kill(int rx);
+ void Force(int rx, unsigned domain);
+ void Collapse(DomainValue *dv, unsigned domain);
+ bool Merge(DomainValue *A, DomainValue *B);
+
+ void enterBasicBlock();
+ void visitGenericInstr(MachineInstr*);
+ void visitSoftInstr(MachineInstr*, unsigned mask);
+ void visitHardInstr(MachineInstr*, unsigned domain);
};
}
-void SSEDomainFixPass::enterBasicBlock(MachineBasicBlock *mbb) {
- MBB = mbb;
- DEBUG(dbgs() << "Entering MBB " << MBB->getName() << "\n");
+char SSEDomainFixPass::ID = 0;
+
+/// Translate TRI register number to an index into our smaller tables of
+/// interesting registers. Return -1 for boring registers.
+int SSEDomainFixPass::RegIndex(unsigned reg) {
+ assert(X86::XMM15 == X86::XMM0+NumRegs-1 && "Unexpected sort");
+ reg -= X86::XMM0;
+ return reg < NumRegs ? (int) reg : -1;
+}
+
+DomainValue *SSEDomainFixPass::Alloc(int domain) {
+ DomainValue *dv = Avail.empty() ?
+ new(Allocator.Allocate()) DomainValue :
+ Avail.pop_back_val();
+ dv->Dist = Distance;
+ if (domain >= 0)
+ dv->addDomain(domain);
+ return dv;
+}
+
+void SSEDomainFixPass::Recycle(DomainValue *dv) {
+ assert(dv && "Cannot recycle NULL");
+ dv->clear();
+ Avail.push_back(dv);
+}
+
+/// Set LiveRegs[rx] = dv, updating reference counts.
+void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ if (!LiveRegs) {
+ LiveRegs = new DomainValue*[NumRegs];
+ std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0);
+ }
+
+ if (LiveRegs[rx] == dv)
+ return;
+ if (LiveRegs[rx]) {
+ assert(LiveRegs[rx]->Refs && "Bad refcount");
+ if (--LiveRegs[rx]->Refs == 0) Recycle(LiveRegs[rx]);
+ }
+ LiveRegs[rx] = dv;
+ if (dv) ++dv->Refs;
+}
+
+// Kill register rx, recycle or collapse any DomainValue.
+void SSEDomainFixPass::Kill(int rx) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ if (!LiveRegs || !LiveRegs[rx]) return;
+
+ // Before killing the last reference to an open DomainValue, collapse it to
+ // the first available domain.
+ if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->isCollapsed())
+ Collapse(LiveRegs[rx], LiveRegs[rx]->getFirstDomain());
+ else
+ SetLiveReg(rx, 0);
+}
+
+/// Force register rx into domain.
+void SSEDomainFixPass::Force(int rx, unsigned domain) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ DomainValue *dv;
+ if (LiveRegs && (dv = LiveRegs[rx])) {
+ if (dv->isCollapsed())
+ dv->addDomain(domain);
+ else if (dv->hasDomain(domain))
+ Collapse(dv, domain);
+ else {
+ // This is an incompatible open DomainValue. Collapse it to whatever and force
+ // the new value into domain. This costs a domain crossing.
+ Collapse(dv, dv->getFirstDomain());
+ assert(LiveRegs[rx] && "Not live after collapse?");
+ LiveRegs[rx]->addDomain(domain);
+ }
+ } else {
+ // Set up basic collapsed DomainValue.
+ SetLiveReg(rx, Alloc(domain));
+ }
+}
+
+/// Collapse open DomainValue into given domain. If there are multiple
+/// registers using dv, they each get a unique collapsed DomainValue.
+void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) {
+ assert(dv->hasDomain(domain) && "Cannot collapse");
+
+ // Collapse all the instructions.
+ while (!dv->Instrs.empty())
+ TII->SetSSEDomain(dv->Instrs.pop_back_val(), domain);
+ dv->setSingleDomain(domain);
+
+ // If there are multiple users, give them new, unique DomainValues.
+ if (LiveRegs && dv->Refs > 1)
+ for (unsigned rx = 0; rx != NumRegs; ++rx)
+ if (LiveRegs[rx] == dv)
+ SetLiveReg(rx, Alloc(domain));
+}
+
+/// Merge - All instructions and registers in B are moved to A, and B is
+/// released.
+bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) {
+ assert(!A->isCollapsed() && "Cannot merge into collapsed");
+ assert(!B->isCollapsed() && "Cannot merge from collapsed");
+ if (A == B)
+ return true;
+ // Restrict to the domains that A and B have in common.
+ unsigned common = A->getCommonDomains(B->AvailableDomains);
+ if (!common)
+ return false;
+ A->AvailableDomains = common;
+ A->Dist = std::max(A->Dist, B->Dist);
+ A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
+ for (unsigned rx = 0; rx != NumRegs; ++rx)
+ if (LiveRegs[rx] == B)
+ SetLiveReg(rx, A);
+ return true;
+}
+
+void SSEDomainFixPass::enterBasicBlock() {
+ // Try to coalesce live-out registers from predecessors.
+ for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
+ e = MBB->livein_end(); i != e; ++i) {
+ int rx = RegIndex(*i);
+ if (rx < 0) continue;
+ for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
+ pe = MBB->pred_end(); pi != pe; ++pi) {
+ LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
+ if (fi == LiveOuts.end()) continue;
+ DomainValue *pdv = fi->second[rx];
+ if (!pdv) continue;
+ if (!LiveRegs || !LiveRegs[rx]) {
+ SetLiveReg(rx, pdv);
+ continue;
+ }
+
+ // We have a live DomainValue from more than one predecessor.
+ if (LiveRegs[rx]->isCollapsed()) {
+ // We are already collapsed, but predecessor is not. Force him.
+ unsigned domain = LiveRegs[rx]->getFirstDomain();
+ if (!pdv->isCollapsed() && pdv->hasDomain(domain))
+ Collapse(pdv, domain);
+ continue;
+ }
+
+ // Currently open, merge in predecessor.
+ if (!pdv->isCollapsed())
+ Merge(LiveRegs[rx], pdv);
+ else
+ Force(rx, pdv->getFirstDomain());
+ }
+ }
+}
+
+// A hard instruction only works in one domain. All input registers will be
+// forced into that domain.
+void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) {
+ // Collapse all uses.
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Force(rx, domain);
+ }
+
+ // Kill all defs and force them.
+ for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Kill(rx);
+ Force(rx, domain);
+ }
+}
+
+// A soft instruction can be changed to work in other domains given by mask.
+void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) {
+ // Bitmask of available domains for this instruction after taking collapsed
+ // operands into account.
+ unsigned available = mask;
+
+ // Scan the explicit use operands for incoming domains.
+ SmallVector<int, 4> used;
+ if (LiveRegs)
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ if (DomainValue *dv = LiveRegs[rx]) {
+ // Bitmask of domains that dv and available have in common.
+ unsigned common = dv->getCommonDomains(available);
+ // Is it possible to use this collapsed register for free?
+ if (dv->isCollapsed()) {
+ // Restrict available domains to the ones in common with the operand.
+ // If there are no common domains, we must pay the cross-domain
+ // penalty for this operand.
+ if (common) available = common;
+ } else if (common)
+ // Open DomainValue is compatible, save it for merging.
+ used.push_back(rx);
+ else
+ // Open DomainValue is not compatible with instruction. It is useless
+ // now.
+ Kill(rx);
+ }
+ }
+
+ // If the collapsed operands force a single domain, propagate the collapse.
+ if (isPowerOf2_32(available)) {
+ unsigned domain = CountTrailingZeros_32(available);
+ TII->SetSSEDomain(mi, domain);
+ visitHardInstr(mi, domain);
+ return;
+ }
+
+ // Kill off any remaining uses that don't match available, and build a list of
+ // incoming DomainValues that we want to merge.
+ SmallVector<DomainValue*,4> doms;
+ for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
+ int rx = *i;
+ DomainValue *dv = LiveRegs[rx];
+ // This useless DomainValue could have been missed above.
+ if (!dv->getCommonDomains(available)) {
+ Kill(*i);
+ continue;
+ }
+ // sorted, uniqued insert.
+ bool inserted = false;
+ for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
+ i != e && !inserted; ++i) {
+ if (dv == *i)
+ inserted = true;
+ else if (dv->Dist < (*i)->Dist) {
+ inserted = true;
+ doms.insert(i, dv);
+ }
+ }
+ if (!inserted)
+ doms.push_back(dv);
+ }
+
+ // doms are now sorted in order of appearance. Try to merge them all, giving
+ // priority to the latest ones.
+ DomainValue *dv = 0;
+ while (!doms.empty()) {
+ if (!dv) {
+ dv = doms.pop_back_val();
+ continue;
+ }
+
+ DomainValue *latest = doms.pop_back_val();
+ if (Merge(dv, latest)) continue;
+
+ // If latest didn't merge, it is useless now. Kill all registers using it.
+ for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i)
+ if (LiveRegs[*i] == latest)
+ Kill(*i);
+ }
+
+ // dv is the DomainValue we are going to use for this instruction.
+ if (!dv)
+ dv = Alloc();
+ dv->Dist = Distance;
+ dv->AvailableDomains = available;
+ dv->Instrs.push_back(mi);
+
+ // Finally set all defs and non-collapsed uses to dv.
+ for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
+ Kill(rx);
+ SetLiveReg(rx, dv);
+ }
+ }
+}
+
+void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) {
+ // Process explicit defs, kill any XMM registers redefined.
+ for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Kill(rx);
+ }
}
bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo());
+ TRI = MF->getTarget().getRegisterInfo();
+ MBB = 0;
+ LiveRegs = 0;
+ Distance = 0;
+ assert(NumRegs == X86::VR128RegClass.getNumRegs() && "Bad regclass");
+
+ // If no XMM registers are used in the function, we can skip it completely.
+ bool anyregs = false;
+ for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(),
+ E = X86::VR128RegClass.end(); I != E; ++I)
+ if (MF->getRegInfo().isPhysRegUsed(*I)) {
+ anyregs = true;
+ break;
+ }
+ if (!anyregs) return false;
MachineBasicBlock *Entry = MF->begin();
SmallPtrSet<MachineBasicBlock*, 16> Visited;
- for (df_ext_iterator<MachineBasicBlock*,
- SmallPtrSet<MachineBasicBlock*, 16> >
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
- DFI != DFE; ++DFI) {
- enterBasicBlock(*DFI);
+ DFI != DFE; ++DFI) {
+ MBB = *DFI;
+ enterBasicBlock();
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
++I) {
- MachineInstr *MI = I;
- const unsigned *equiv = 0;
- X86InstrInfo::SSEDomain domain = TII->GetSSEDomain(MI, equiv);
- DEBUG(dbgs() << "isd-"[domain] << (equiv ? "* " : " ") << *MI);
+ MachineInstr *mi = I;
+ if (mi->isDebugValue()) continue;
+ ++Distance;
+ std::pair<uint16_t, uint16_t> domp = TII->GetSSEDomain(mi);
+ if (domp.first)
+ if (domp.second)
+ visitSoftInstr(mi, domp.second);
+ else
+ visitHardInstr(mi, domp.first);
+ else if (LiveRegs)
+ visitGenericInstr(mi);
}
+
+ // Save live registers at end of MBB - used by enterBasicBlock().
+ if (LiveRegs)
+ LiveOuts.insert(std::make_pair(MBB, LiveRegs));
+ LiveRegs = 0;
}
+
+ // Clear the LiveOuts vectors. Should we also collapse any remaining
+ // DomainValues?
+ for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
+ i != e; ++i)
+ delete[] i->second;
+ LiveOuts.clear();
+ Avail.clear();
+ Allocator.DestroyAll();
+
return false;
}
FunctionPass *llvm::createSSEDomainFixPass() {
return new SSEDomainFixPass();
}
-
-// These are the replaceable instructions. Some of these have _Int variants
-// that we don't include here. We don't want to replace instructions selected
-// by intrinsics.
-static const unsigned ReplaceableInstrs[][3] = {
- //PackedInt PackedSingle PackedDouble
- { X86::MOVDQAmr, X86::MOVAPSmr, X86::MOVAPDmr },
- { X86::MOVDQArm, X86::MOVAPSrm, X86::MOVAPDrm },
- { X86::MOVDQArr, X86::MOVAPSrr, X86::MOVAPDrr },
- { X86::MOVDQUmr, X86::MOVUPSmr, X86::MOVUPDmr },
- { X86::MOVDQUrm, X86::MOVUPSrm, X86::MOVUPDrm },
- { X86::MOVNTDQmr, X86::MOVNTPSmr, X86::MOVNTPDmr },
- { X86::PANDNrm, X86::ANDNPSrm, X86::ANDNPDrm },
- { X86::PANDNrr, X86::ANDNPSrr, X86::ANDNPDrr },
- { X86::PANDrm, X86::ANDPSrm, X86::ANDPDrm },
- { X86::PANDrr, X86::ANDPSrr, X86::ANDPDrr },
- { X86::PORrm, X86::ORPSrm, X86::ORPDrm },
- { X86::PORrr, X86::ORPSrr, X86::ORPDrr },
- { X86::PUNPCKHQDQrm, X86::UNPCKHPSrm, X86::UNPCKHPDrm },
- { X86::PUNPCKHQDQrr, X86::UNPCKHPSrr, X86::UNPCKHPDrr },
- { X86::PUNPCKLQDQrm, X86::UNPCKLPSrm, X86::UNPCKLPDrm },
- { X86::PUNPCKLQDQrr, X86::UNPCKLPSrr, X86::UNPCKLPDrr },
- { X86::PXORrm, X86::XORPSrm, X86::XORPDrm },
- { X86::PXORrr, X86::XORPSrr, X86::XORPDrr },
-};
-
-void X86InstrInfo::populateSSEInstrDomainTable() {
- // Instructions that execute in the packed integer domain.
- static const unsigned PackedIntInstrs[] = {
- X86::LDDQUrm,
- X86::MASKMOVDQU,
- X86::MASKMOVDQU64,
- X86::MOVDI2PDIrm,
- X86::MOVDI2PDIrr,
- X86::MOVDQUmr_Int,
- X86::MOVDQUrm_Int,
- X86::MOVLQ128mr,
- X86::MOVNTDQArm,
- X86::MOVNTDQmr_Int,
- X86::MOVNTDQ_64mr,
- X86::MOVPDI2DImr,
- X86::MOVPDI2DIrr,
- X86::MOVPQI2QImr,
- X86::MOVPQIto64rr,
- X86::MOVQI2PQIrm,
- X86::MOVQxrxr,
- X86::MOVZDI2PDIrm,
- X86::MOVZDI2PDIrr,
- X86::MOVZPQILo2PQIrm,
- X86::MOVZPQILo2PQIrr,
- X86::MOVZQI2PQIrm,
- X86::MOVZQI2PQIrr,
- X86::MPSADBWrmi,
- X86::MPSADBWrri,
- X86::PABSBrm128,
- X86::PABSBrr128,
- X86::PABSDrm128,
- X86::PABSDrr128,
- X86::PABSWrm128,
- X86::PABSWrr128,
- X86::PACKSSDWrm,
- X86::PACKSSDWrr,
- X86::PACKSSWBrm,
- X86::PACKSSWBrr,
- X86::PACKUSDWrm,
- X86::PACKUSDWrr,
- X86::PACKUSWBrm,
- X86::PACKUSWBrr,
- X86::PADDBrm,
- X86::PADDBrr,
- X86::PADDDrm,
- X86::PADDDrr,
- X86::PADDQrm,
- X86::PADDQrr,
- X86::PADDSBrm,
- X86::PADDSBrr,
- X86::PADDSWrm,
- X86::PADDSWrr,
- X86::PADDUSBrm,
- X86::PADDUSBrr,
- X86::PADDUSWrm,
- X86::PADDUSWrr,
- X86::PADDWrm,
- X86::PADDWrr,
- X86::PALIGNR128rm,
- X86::PALIGNR128rr,
- X86::PAVGBrm,
- X86::PAVGBrr,
- X86::PAVGWrm,
- X86::PAVGWrr,
- X86::PBLENDVBrm0,
- X86::PBLENDVBrr0,
- X86::PBLENDWrmi,
- X86::PBLENDWrri,
- X86::PCMPEQBrm,
- X86::PCMPEQBrr,
- X86::PCMPEQDrm,
- X86::PCMPEQDrr,
- X86::PCMPEQQrm,
- X86::PCMPEQQrr,
- X86::PCMPEQWrm,
- X86::PCMPEQWrr,
- X86::PCMPESTRIArm,
- X86::PCMPESTRIArr,
- X86::PCMPESTRICrm,
- X86::PCMPESTRICrr,
- X86::PCMPESTRIOrm,
- X86::PCMPESTRIOrr,
- X86::PCMPESTRIrm,
- X86::PCMPESTRIrr,
- X86::PCMPESTRISrm,
- X86::PCMPESTRISrr,
- X86::PCMPESTRIZrm,
- X86::PCMPESTRIZrr,
- X86::PCMPESTRM128MEM,
- X86::PCMPESTRM128REG,
- X86::PCMPESTRM128rm,
- X86::PCMPESTRM128rr,
- X86::PCMPGTBrm,
- X86::PCMPGTBrr,
- X86::PCMPGTDrm,
- X86::PCMPGTDrr,
- X86::PCMPGTQrm,
- X86::PCMPGTQrr,
- X86::PCMPGTWrm,
- X86::PCMPGTWrr,
- X86::PCMPISTRIArm,
- X86::PCMPISTRIArr,
- X86::PCMPISTRICrm,
- X86::PCMPISTRICrr,
- X86::PCMPISTRIOrm,
- X86::PCMPISTRIOrr,
- X86::PCMPISTRIrm,
- X86::PCMPISTRIrr,
- X86::PCMPISTRISrm,
- X86::PCMPISTRISrr,
- X86::PCMPISTRIZrm,
- X86::PCMPISTRIZrr,
- X86::PCMPISTRM128MEM,
- X86::PCMPISTRM128REG,
- X86::PCMPISTRM128rm,
- X86::PCMPISTRM128rr,
- X86::PEXTRBmr,
- X86::PEXTRBrr,
- X86::PEXTRDmr,
- X86::PEXTRDrr,
- X86::PEXTRQmr,
- X86::PEXTRQrr,
- X86::PEXTRWmr,
- X86::PEXTRWri,
- X86::PHADDDrm128,
- X86::PHADDDrr128,
- X86::PHADDSWrm128,
- X86::PHADDSWrr128,
- X86::PHADDWrm128,
- X86::PHADDWrr128,
- X86::PHMINPOSUWrm128,
- X86::PHMINPOSUWrr128,
- X86::PHSUBDrm128,
- X86::PHSUBDrr128,
- X86::PHSUBSWrm128,
- X86::PHSUBSWrr128,
- X86::PHSUBWrm128,
- X86::PHSUBWrr128,
- X86::PINSRBrm,
- X86::PINSRBrr,
- X86::PINSRDrm,
- X86::PINSRDrr,
- X86::PINSRQrm,
- X86::PINSRQrr,
- X86::PINSRWrmi,
- X86::PINSRWrri,
- X86::PMADDUBSWrm128,
- X86::PMADDUBSWrr128,
- X86::PMADDWDrm,
- X86::PMADDWDrr,
- X86::PMAXSBrm,
- X86::PMAXSBrr,
- X86::PMAXSDrm,
- X86::PMAXSDrr,
- X86::PMAXSWrm,
- X86::PMAXSWrr,
- X86::PMAXUBrm,
- X86::PMAXUBrr,
- X86::PMAXUDrm,
- X86::PMAXUDrr,
- X86::PMAXUWrm,
- X86::PMAXUWrr,
- X86::PMINSBrm,
- X86::PMINSBrr,
- X86::PMINSDrm,
- X86::PMINSDrr,
- X86::PMINSWrm,
- X86::PMINSWrr,
- X86::PMINUBrm,
- X86::PMINUBrr,
- X86::PMINUDrm,
- X86::PMINUDrr,
- X86::PMINUWrm,
- X86::PMINUWrr,
- X86::PMOVSXBDrm,
- X86::PMOVSXBDrr,
- X86::PMOVSXBQrm,
- X86::PMOVSXBQrr,
- X86::PMOVSXBWrm,
- X86::PMOVSXBWrr,
- X86::PMOVSXDQrm,
- X86::PMOVSXDQrr,
- X86::PMOVSXWDrm,
- X86::PMOVSXWDrr,
- X86::PMOVSXWQrm,
- X86::PMOVSXWQrr,
- X86::PMOVZXBDrm,
- X86::PMOVZXBDrr,
- X86::PMOVZXBQrm,
- X86::PMOVZXBQrr,
- X86::PMOVZXBWrm,
- X86::PMOVZXBWrr,
- X86::PMOVZXDQrm,
- X86::PMOVZXDQrr,
- X86::PMOVZXWDrm,
- X86::PMOVZXWDrr,
- X86::PMOVZXWQrm,
- X86::PMOVZXWQrr,
- X86::PMULDQrm,
- X86::PMULDQrr,
- X86::PMULHRSWrm128,
- X86::PMULHRSWrr128,
- X86::PMULHUWrm,
- X86::PMULHUWrr,
- X86::PMULHWrm,
- X86::PMULHWrr,
- X86::PMULLDrm,
- X86::PMULLDrm_int,
- X86::PMULLDrr,
- X86::PMULLDrr_int,
- X86::PMULLWrm,
- X86::PMULLWrr,
- X86::PMULUDQrm,
- X86::PMULUDQrr,
- X86::PSADBWrm,
- X86::PSADBWrr,
- X86::PSHUFBrm128,
- X86::PSHUFBrr128,
- X86::PSHUFHWmi,
- X86::PSHUFHWri,
- X86::PSHUFLWmi,
- X86::PSHUFLWri,
- X86::PSIGNBrm128,
- X86::PSIGNBrr128,
- X86::PSIGNDrm128,
- X86::PSIGNDrr128,
- X86::PSIGNWrm128,
- X86::PSIGNWrr128,
- X86::PSLLDQri,
- X86::PSLLDri,
- X86::PSLLDrm,
- X86::PSLLDrr,
- X86::PSLLQri,
- X86::PSLLQrm,
- X86::PSLLQrr,
- X86::PSLLWri,
- X86::PSLLWrm,
- X86::PSLLWrr,
- X86::PSRADri,
- X86::PSRADrm,
- X86::PSRADrr,
- X86::PSRAWri,
- X86::PSRAWrm,
- X86::PSRAWrr,
- X86::PSRLDQri,
- X86::PSRLDri,
- X86::PSRLDrm,
- X86::PSRLDrr,
- X86::PSRLQri,
- X86::PSRLQrm,
- X86::PSRLQrr,
- X86::PSRLWri,
- X86::PSRLWrm,
- X86::PSRLWrr,
- X86::PSUBBrm,
- X86::PSUBBrr,
- X86::PSUBDrm,
- X86::PSUBDrr,
- X86::PSUBQrm,
- X86::PSUBQrr,
- X86::PSUBSBrm,
- X86::PSUBSBrr,
- X86::PSUBSWrm,
- X86::PSUBSWrr,
- X86::PSUBUSBrm,
- X86::PSUBUSBrr,
- X86::PSUBUSWrm,
- X86::PSUBUSWrr,
- X86::PSUBWrm,
- X86::PSUBWrr,
- X86::PUNPCKHBWrm,
- X86::PUNPCKHBWrr,
- X86::PUNPCKHWDrm,
- X86::PUNPCKHWDrr,
- X86::PUNPCKLBWrm,
- X86::PUNPCKLBWrr,
- X86::PUNPCKLWDrm,
- X86::PUNPCKLWDrr,
- };
-
- // Instructions that execute in the packed single domain.
- static const unsigned PackedSingleInstrs[] = {
- X86::ADDPSrm,
- X86::ADDPSrr,
- X86::ADDSUBPSrm,
- X86::ADDSUBPSrr,
- X86::BLENDPSrmi,
- X86::BLENDPSrri,
- X86::BLENDVPSrm0,
- X86::BLENDVPSrr0,
- X86::CMPPSrmi,
- X86::CMPPSrri,
- X86::DIVPSrm,
- X86::DIVPSrr,
- X86::DPPSrmi,
- X86::DPPSrri,
- X86::EXTRACTPSmr,
- X86::EXTRACTPSrr,
- X86::HADDPSrm,
- X86::HADDPSrr,
- X86::HSUBPSrm,
- X86::HSUBPSrr,
- X86::INSERTPSrm,
- X86::INSERTPSrr,
- X86::MAXPSrm,
- X86::MAXPSrm_Int,
- X86::MAXPSrr,
- X86::MAXPSrr_Int,
- X86::MINPSrm,
- X86::MINPSrm_Int,
- X86::MINPSrr,
- X86::MINPSrr_Int,
- X86::MOVHLPSrr,
- X86::MOVHPSmr,
- X86::MOVHPSrm,
- X86::MOVLHPSrr,
- X86::MOVLPSmr,
- X86::MOVLPSrm,
- X86::MOVMSKPSrr,
- X86::MOVNTPSmr_Int,
- X86::MOVSHDUPrm,
- X86::MOVSHDUPrr,
- X86::MOVSLDUPrm,
- X86::MOVSLDUPrr,
- X86::MOVUPSmr_Int,
- X86::MOVUPSrm_Int,
- X86::MULPSrm,
- X86::MULPSrr,
- X86::RCPPSm,
- X86::RCPPSm_Int,
- X86::RCPPSr,
- X86::RCPPSr_Int,
- X86::ROUNDPSm_Int,
- X86::ROUNDPSr_Int,
- X86::RSQRTPSm,
- X86::RSQRTPSm_Int,
- X86::RSQRTPSr,
- X86::RSQRTPSr_Int,
- X86::SQRTPSm,
- X86::SQRTPSm_Int,
- X86::SQRTPSr,
- X86::SQRTPSr_Int,
- X86::SUBPSrm,
- X86::SUBPSrr,
- };
-
- // Instructions that execute in the packed double domain.
- static const unsigned PackedDoubleInstrs[] = {
- X86::ADDPDrm,
- X86::ADDPDrr,
- X86::ADDSUBPDrm,
- X86::ADDSUBPDrr,
- X86::BLENDPDrmi,
- X86::BLENDPDrri,
- X86::BLENDVPDrm0,
- X86::BLENDVPDrr0,
- X86::CMPPDrmi,
- X86::CMPPDrri,
- X86::DIVPDrm,
- X86::DIVPDrr,
- X86::DPPDrmi,
- X86::DPPDrri,
- X86::HADDPDrm,
- X86::HADDPDrr,
- X86::HSUBPDrm,
- X86::HSUBPDrr,
- X86::MAXPDrm,
- X86::MAXPDrm_Int,
- X86::MAXPDrr,
- X86::MAXPDrr_Int,
- X86::MINPDrm,
- X86::MINPDrm_Int,
- X86::MINPDrr,
- X86::MINPDrr_Int,
- X86::MOVHPDmr,
- X86::MOVHPDrm,
- X86::MOVLPDmr,
- X86::MOVLPDrm,
- X86::MOVMSKPDrr,
- X86::MOVNTPDmr_Int,
- X86::MOVUPDmr_Int,
- X86::MOVUPDrm_Int,
- X86::MULPDrm,
- X86::MULPDrr,
- X86::ROUNDPDm_Int,
- X86::ROUNDPDr_Int,
- X86::SQRTPDm,
- X86::SQRTPDm_Int,
- X86::SQRTPDr,
- X86::SQRTPDr_Int,
- X86::SUBPDrm,
- X86::SUBPDrr,
- };
-
- // Add non-negative entries for forcing instructions.
- for (unsigned i = 0, e = array_lengthof(PackedIntInstrs); i != e; ++i)
- SSEInstrDomainTable.insert(std::make_pair(PackedIntInstrs[i],
- PackedInt));
- for (unsigned i = 0, e = array_lengthof(PackedSingleInstrs); i != e; ++i)
- SSEInstrDomainTable.insert(std::make_pair(PackedSingleInstrs[i],
- PackedSingle));
- for (unsigned i = 0, e = array_lengthof(PackedDoubleInstrs); i != e; ++i)
- SSEInstrDomainTable.insert(std::make_pair(PackedDoubleInstrs[i],
- PackedDouble));
-
- // Add row number + 1 for replaceable instructions.
- for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
- for (unsigned c = 0; c != 3; ++c)
- SSEInstrDomainTable.insert(std::make_pair(ReplaceableInstrs[i][c],
- c + 4*(i+1)));
-}
-
-X86InstrInfo::SSEDomain X86InstrInfo::GetSSEDomain(const MachineInstr *MI,
- const unsigned *&equiv) const {
- DenseMap<unsigned,unsigned>::const_iterator i =
- SSEInstrDomainTable.find(MI->getOpcode());
- if (i == SSEInstrDomainTable.end())
- return NotSSEDomain;
- unsigned value = i->second;
- if (value/4)
- equiv = ReplaceableInstrs[value/4 - 1];
- else
- equiv = 0;
- return SSEDomain(value & 3);
-}