#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/MC/MCAsmInfo.h"
-
#include <limits>
using namespace llvm;
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
: TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)),
TM(tm), RI(tm, *this) {
- SmallVector<unsigned,16> AmbEntries;
+ enum {
+ TB_NOT_REVERSABLE = 1U << 31,
+ TB_FLAGS = TB_NOT_REVERSABLE
+ };
+
static const unsigned OpTbl2Addr[][2] = {
{ X86::ADC32ri, X86::ADC32mi },
{ X86::ADC32ri8, X86::ADC32mi8 },
{ X86::ADC64rr, X86::ADC64mr },
{ X86::ADD16ri, X86::ADD16mi },
{ X86::ADD16ri8, X86::ADD16mi8 },
+ { X86::ADD16ri_DB, X86::ADD16mi | TB_NOT_REVERSABLE },
+ { X86::ADD16ri8_DB, X86::ADD16mi8 | TB_NOT_REVERSABLE },
{ X86::ADD16rr, X86::ADD16mr },
+ { X86::ADD16rr_DB, X86::ADD16mr | TB_NOT_REVERSABLE },
{ X86::ADD32ri, X86::ADD32mi },
{ X86::ADD32ri8, X86::ADD32mi8 },
+ { X86::ADD32ri_DB, X86::ADD32mi | TB_NOT_REVERSABLE },
+ { X86::ADD32ri8_DB, X86::ADD32mi8 | TB_NOT_REVERSABLE },
{ X86::ADD32rr, X86::ADD32mr },
+ { X86::ADD32rr_DB, X86::ADD32mr | TB_NOT_REVERSABLE },
{ X86::ADD64ri32, X86::ADD64mi32 },
{ X86::ADD64ri8, X86::ADD64mi8 },
+ { X86::ADD64ri32_DB,X86::ADD64mi32 | TB_NOT_REVERSABLE },
+ { X86::ADD64ri8_DB, X86::ADD64mi8 | TB_NOT_REVERSABLE },
{ X86::ADD64rr, X86::ADD64mr },
+ { X86::ADD64rr_DB, X86::ADD64mr | TB_NOT_REVERSABLE },
{ X86::ADD8ri, X86::ADD8mi },
{ X86::ADD8rr, X86::ADD8mr },
{ X86::AND16ri, X86::AND16mi },
for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
unsigned RegOp = OpTbl2Addr[i][0];
- unsigned MemOp = OpTbl2Addr[i][1];
- if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp,
- std::make_pair(MemOp,0))).second)
- assert(false && "Duplicated entries?");
+ unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS;
+ assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?");
+ RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U);
+
+ // If this is not a reversable operation (because there is a many->one)
+ // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+ if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE)
+ continue;
+
// Index 0, folded load and store, no alignment requirement.
unsigned AuxInfo = 0 | (1 << 4) | (1 << 5);
- if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
- std::make_pair(RegOp,
- AuxInfo))).second)
- AmbEntries.push_back(MemOp);
+
+ assert(!MemOp2RegOpTable.count(MemOp) &&
+ "Duplicated entries in unfolding maps?");
+ MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
}
// If the third value is 1, then it's folding either a load or a store.
{ X86::BT64ri8, X86::BT64mi8, 1, 0 },
{ X86::CALL32r, X86::CALL32m, 1, 0 },
{ X86::CALL64r, X86::CALL64m, 1, 0 },
+ { X86::WINCALL64r, X86::WINCALL64m, 1, 0 },
{ X86::CMP16ri, X86::CMP16mi, 1, 0 },
{ X86::CMP16ri8, X86::CMP16mi8, 1, 0 },
{ X86::CMP16rr, X86::CMP16mr, 1, 0 },
{ X86::DIV64r, X86::DIV64m, 1, 0 },
{ X86::DIV8r, X86::DIV8m, 1, 0 },
{ X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 },
- { X86::FsMOVAPDrr, X86::MOVSDmr, 0, 0 },
- { X86::FsMOVAPSrr, X86::MOVSSmr, 0, 0 },
+ { X86::FsMOVAPDrr, X86::MOVSDmr | TB_NOT_REVERSABLE , 0, 0 },
+ { X86::FsMOVAPSrr, X86::MOVSSmr | TB_NOT_REVERSABLE , 0, 0 },
{ X86::IDIV16r, X86::IDIV16m, 1, 0 },
{ X86::IDIV32r, X86::IDIV32m, 1, 0 },
{ X86::IDIV64r, X86::IDIV64m, 1, 0 },
{ X86::MOVDQArr, X86::MOVDQAmr, 0, 16 },
{ X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 },
{ X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 },
- { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0, 0 },
- { X86::MOVSDrr, X86::MOVSDmr, 0, 0 },
{ X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 },
{ X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 },
- { X86::MOVSSrr, X86::MOVSSmr, 0, 0 },
{ X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 },
{ X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 },
{ X86::MUL16r, X86::MUL16m, 1, 0 },
{ X86::SETPr, X86::SETPm, 0, 0 },
{ X86::SETSr, X86::SETSm, 0, 0 },
{ X86::TAILJMPr, X86::TAILJMPm, 1, 0 },
+ { X86::TAILJMPr64, X86::TAILJMPm64, 1, 0 },
{ X86::TEST16ri, X86::TEST16mi, 1, 0 },
{ X86::TEST32ri, X86::TEST32mi, 1, 0 },
{ X86::TEST64ri32, X86::TEST64mi32, 1, 0 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
- unsigned RegOp = OpTbl0[i][0];
- unsigned MemOp = OpTbl0[i][1];
- unsigned Align = OpTbl0[i][3];
- if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp,
- std::make_pair(MemOp,Align))).second)
- assert(false && "Duplicated entries?");
+ unsigned RegOp = OpTbl0[i][0];
+ unsigned MemOp = OpTbl0[i][1] & ~TB_FLAGS;
unsigned FoldedLoad = OpTbl0[i][2];
+ unsigned Align = OpTbl0[i][3];
+ assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?");
+ RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align);
+
+ // If this is not a reversable operation (because there is a many->one)
+ // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+ if (OpTbl0[i][1] & TB_NOT_REVERSABLE)
+ continue;
+
// Index 0, folded load or store.
unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5);
- if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
- if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
- std::make_pair(RegOp, AuxInfo))).second)
- AmbEntries.push_back(MemOp);
+ assert(!MemOp2RegOpTable.count(MemOp) && "Duplicated entries?");
+ MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
}
static const unsigned OpTbl1[][3] = {
{ X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 },
{ X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 },
{ X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 },
- { X86::FsMOVAPDrr, X86::MOVSDrm, 0 },
- { X86::FsMOVAPSrr, X86::MOVSSrm, 0 },
+ { X86::FsMOVAPDrr, X86::MOVSDrm | TB_NOT_REVERSABLE , 0 },
+ { X86::FsMOVAPSrr, X86::MOVSSrm | TB_NOT_REVERSABLE , 0 },
{ X86::IMUL16rri, X86::IMUL16rmi, 0 },
{ X86::IMUL16rri8, X86::IMUL16rmi8, 0 },
{ X86::IMUL32rri, X86::IMUL32rmi, 0 },
{ X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 },
{ X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 },
{ X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 },
- { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 },
- { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm, 0 },
+ { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 },
+ { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
{ X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
{ X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
{ X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
{ X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
{ X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 },
{ X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 },
- { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 },
- { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 },
+ { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, 16 },
+ { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, 16 },
{ X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
{ X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 },
{ X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 },
{ X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
{ X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
{ X86::MOVDQArr, X86::MOVDQArm, 16 },
- { X86::MOVSD2PDrr, X86::MOVSD2PDrm, 0 },
- { X86::MOVSDrr, X86::MOVSDrm, 0 },
{ X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 },
{ X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 },
- { X86::MOVSS2PSrr, X86::MOVSS2PSrm, 0 },
- { X86::MOVSSrr, X86::MOVSSrm, 0 },
{ X86::MOVSX16rr8, X86::MOVSX16rm8, 0 },
{ X86::MOVSX32rr16, X86::MOVSX32rm16, 0 },
{ X86::MOVSX32rr8, X86::MOVSX32rm8, 0 },
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
unsigned RegOp = OpTbl1[i][0];
- unsigned MemOp = OpTbl1[i][1];
+ unsigned MemOp = OpTbl1[i][1] & ~TB_FLAGS;
unsigned Align = OpTbl1[i][2];
- if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp,
- std::make_pair(MemOp,Align))).second)
- assert(false && "Duplicated entries?");
+ assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries");
+ RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align);
+
+ // If this is not a reversable operation (because there is a many->one)
+ // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+ if (OpTbl1[i][1] & TB_NOT_REVERSABLE)
+ continue;
+
// Index 1, folded load
unsigned AuxInfo = 1 | (1 << 4);
- if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
- if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
- std::make_pair(RegOp, AuxInfo))).second)
- AmbEntries.push_back(MemOp);
+ assert(!MemOp2RegOpTable.count(MemOp) && "Duplicate entries");
+ MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
}
static const unsigned OpTbl2[][3] = {
{ X86::ADC32rr, X86::ADC32rm, 0 },
{ X86::ADC64rr, X86::ADC64rm, 0 },
{ X86::ADD16rr, X86::ADD16rm, 0 },
+ { X86::ADD16rr_DB, X86::ADD16rm | TB_NOT_REVERSABLE, 0 },
{ X86::ADD32rr, X86::ADD32rm, 0 },
+ { X86::ADD32rr_DB, X86::ADD32rm | TB_NOT_REVERSABLE, 0 },
{ X86::ADD64rr, X86::ADD64rm, 0 },
+ { X86::ADD64rr_DB, X86::ADD64rm | TB_NOT_REVERSABLE, 0 },
{ X86::ADD8rr, X86::ADD8rm, 0 },
{ X86::ADDPDrr, X86::ADDPDrm, 16 },
{ X86::ADDPSrr, X86::ADDPSrm, 16 },
{ X86::PMULHUWrr, X86::PMULHUWrm, 16 },
{ X86::PMULHWrr, X86::PMULHWrm, 16 },
{ X86::PMULLDrr, X86::PMULLDrm, 16 },
- { X86::PMULLDrr_int, X86::PMULLDrm_int, 16 },
{ X86::PMULLWrr, X86::PMULLWrm, 16 },
{ X86::PMULUDQrr, X86::PMULUDQrm, 16 },
{ X86::PORrr, X86::PORrm, 16 },
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
unsigned RegOp = OpTbl2[i][0];
- unsigned MemOp = OpTbl2[i][1];
+ unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS;
unsigned Align = OpTbl2[i][2];
- if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp,
- std::make_pair(MemOp,Align))).second)
- assert(false && "Duplicated entries?");
+
+ assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!");
+ RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align);
+
+ // If this is not a reversable operation (because there is a many->one)
+ // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+ if (OpTbl2[i][1] & TB_NOT_REVERSABLE)
+ continue;
+
// Index 2, folded load
unsigned AuxInfo = 2 | (1 << 4);
- if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
- std::make_pair(RegOp, AuxInfo))).second)
- AmbEntries.push_back(MemOp);
- }
-
- // Remove ambiguous entries.
- assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?");
-}
-
-bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
- switch (MI.getOpcode()) {
- default:
- return false;
- case X86::MOV8rr:
- case X86::MOV8rr_NOREX:
- case X86::MOV16rr:
- case X86::MOV32rr:
- case X86::MOV64rr:
- case X86::MOVSSrr:
- case X86::MOVSDrr:
-
- // FP Stack register class copies
- case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080:
- case X86::MOV_Fp3264: case X86::MOV_Fp3280:
- case X86::MOV_Fp6432: case X86::MOV_Fp8032:
-
- case X86::FsMOVAPSrr:
- case X86::FsMOVAPDrr:
- case X86::MOVAPSrr:
- case X86::MOVAPDrr:
- case X86::MOVDQArr:
- case X86::MOVSS2PSrr:
- case X86::MOVSD2PDrr:
- case X86::MOVPS2SSrr:
- case X86::MOVPD2SDrr:
- case X86::MMX_MOVQ64rr:
- assert(MI.getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "invalid register-register move instruction");
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- SrcSubIdx = MI.getOperand(1).getSubReg();
- DstSubIdx = MI.getOperand(0).getSubReg();
- return true;
+ assert(!MemOp2RegOpTable.count(MemOp) &&
+ "Duplicated entries in unfolding maps?");
+ MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
}
}
case X86::MOVZX32rr8:
case X86::MOVSX64rr8:
case X86::MOVZX64rr8:
- SubIdx = 1;
+ SubIdx = X86::sub_8bit;
break;
case X86::MOVSX32rr16:
case X86::MOVZX32rr16:
case X86::MOVSX64rr16:
case X86::MOVZX64rr16:
- SubIdx = 3;
+ SubIdx = X86::sub_16bit;
break;
case X86::MOVSX64rr32:
case X86::MOVZX64rr32:
- SubIdx = 4;
+ SubIdx = X86::sub_32bit;
break;
}
return true;
unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
if (isFrameLoadOpcode(MI->getOpcode()))
- if (isFrameOperand(MI, 1, FrameIndex))
+ if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
return MI->getOperand(0).getReg();
return 0;
}
unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
if (isFrameStoreOpcode(MI->getOpcode()))
- if (isFrameOperand(MI, 0, FrameIndex))
- return MI->getOperand(X86AddrNumOperands).getReg();
+ if (MI->getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
+ isFrameOperand(MI, 0, FrameIndex))
+ return MI->getOperand(X86::AddrNumOperands).getReg();
return 0;
}
/// a few instructions in each direction it assumes it's not safe.
static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
+ MachineBasicBlock::iterator E = MBB.end();
+
// It's always safe to clobber EFLAGS at the end of a block.
- if (I == MBB.end())
+ if (I == E)
return true;
// For compile time consideration, if we are not able to determine the
// This instruction defines EFLAGS, no need to look any further.
return true;
++Iter;
+ // Skip over DBG_VALUE.
+ while (Iter != E && Iter->isDebugValue())
+ ++Iter;
// If we make it to the end of the block, it's safe to clobber EFLAGS.
- if (Iter == MBB.end())
+ if (Iter == E)
return true;
}
+ MachineBasicBlock::iterator B = MBB.begin();
Iter = I;
for (unsigned i = 0; i < 4; ++i) {
// If we make it to the beginning of the block, it's safe to clobber
// EFLAGS iff EFLAGS is not live-in.
- if (Iter == MBB.begin())
+ if (Iter == B)
return !MBB.isLiveIn(X86::EFLAGS);
--Iter;
+ // Skip over DBG_VALUE.
+ while (Iter != B && Iter->isDebugValue())
+ --Iter;
+
bool SawKill = false;
for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
MachineOperand &MO = Iter->getOperand(j);
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const {
- DebugLoc DL = MBB.findDebugLoc(I);
-
- if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
- DestReg = TRI->getSubReg(DestReg, SubIdx);
- SubIdx = 0;
- }
+ const TargetRegisterInfo &TRI) const {
+ DebugLoc DL = Orig->getDebugLoc();
// MOV32r0 etc. are implemented with xor which clobbers condition code.
// Re-materialize them as movri instructions to avoid side effects.
case X86::MOV8r0: Opc = X86::MOV8ri; break;
case X86::MOV16r0: Opc = X86::MOV16ri; break;
case X86::MOV32r0: Opc = X86::MOV32ri; break;
- case X86::MOV64r0: Opc = X86::MOV64ri; break;
+ case X86::MOV64r0: Opc = X86::MOV64ri64i32; break;
}
Clone = false;
}
if (Clone) {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
- MI->getOperand(0).setReg(DestReg);
MBB.insert(I, MI);
} else {
- BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0);
+ BuildMI(MBB, I, DL, get(Opc)).addOperand(Orig->getOperand(0)).addImm(0);
}
MachineInstr *NewMI = prior(I);
- NewMI->getOperand(0).setSubReg(SubIdx);
+ NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
}
/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that
unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
? X86::LEA64_32r : X86::LEA32r;
MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
- unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
// Build and insert into an implicit UNDEF value. This is OK because
// least on modern x86 machines).
BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
MachineInstr *InsMI =
- BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
- .addReg(leaInReg)
- .addReg(Src, getKillRegState(isKill))
- .addImm(X86::SUBREG_16BIT);
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
+ .addReg(leaInReg, RegState::Define, X86::sub_16bit)
+ .addReg(Src, getKillRegState(isKill));
MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
get(Opc), leaOutReg);
case X86::SHL16ri: {
unsigned ShAmt = MI->getOperand(2).getImm();
MIB.addReg(0).addImm(1 << ShAmt)
- .addReg(leaInReg, RegState::Kill).addImm(0);
+ .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0);
break;
}
case X86::INC16r:
case X86::INC64_16r:
- addLeaRegOffset(MIB, leaInReg, true, 1);
+ addRegOffset(MIB, leaInReg, true, 1);
break;
case X86::DEC16r:
case X86::DEC64_16r:
- addLeaRegOffset(MIB, leaInReg, true, -1);
+ addRegOffset(MIB, leaInReg, true, -1);
break;
case X86::ADD16ri:
case X86::ADD16ri8:
- addLeaRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
+ case X86::ADD16ri_DB:
+ case X86::ADD16ri8_DB:
+ addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
break;
- case X86::ADD16rr: {
+ case X86::ADD16rr:
+ case X86::ADD16rr_DB: {
unsigned Src2 = MI->getOperand(2).getReg();
bool isKill2 = MI->getOperand(2).isKill();
unsigned leaInReg2 = 0;
// just a single insert_subreg.
addRegReg(MIB, leaInReg, true, leaInReg, false);
} else {
- leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
// Build and insert into an implicit UNDEF value. This is OK because
// well be shifting and then extracting the lower 16-bits.
BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
InsMI2 =
- BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2)
- .addReg(leaInReg2)
- .addReg(Src2, getKillRegState(isKill2))
- .addImm(X86::SUBREG_16BIT);
+ BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
+ .addReg(leaInReg2, RegState::Define, X86::sub_16bit)
+ .addReg(Src2, getKillRegState(isKill2));
addRegReg(MIB, leaInReg, true, leaInReg2, true);
}
if (LV && isKill2 && InsMI2)
MachineInstr *NewMI = MIB;
MachineInstr *ExtMI =
- BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
- .addReg(leaOutReg, RegState::Kill)
- .addImm(X86::SUBREG_16BIT);
+ .addReg(leaOutReg, RegState::Kill, X86::sub_16bit);
if (LV) {
// Update live variables
unsigned ShAmt = MI->getOperand(2).getImm();
if (ShAmt == 0 || ShAmt >= 4) return 0;
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src) &&
+ !MF.getRegInfo().constrainRegClass(Src, &X86::GR64_NOSPRegClass))
+ return 0;
+
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
.addReg(0).addImm(1 << ShAmt)
.addReg(Src, getKillRegState(isKill))
- .addImm(0);
+ .addImm(0).addReg(0);
break;
}
case X86::SHL32ri: {
unsigned ShAmt = MI->getOperand(2).getImm();
if (ShAmt == 0 || ShAmt >= 4) return 0;
+ // LEA can't handle ESP.
+ if (TargetRegisterInfo::isVirtualRegister(Src) &&
+ !MF.getRegInfo().constrainRegClass(Src, &X86::GR32_NOSPRegClass))
+ return 0;
+
unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
.addReg(0).addImm(1 << ShAmt)
- .addReg(Src, getKillRegState(isKill)).addImm(0);
+ .addReg(Src, getKillRegState(isKill)).addImm(0).addReg(0);
break;
}
case X86::SHL16ri: {
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
.addReg(0).addImm(1 << ShAmt)
.addReg(Src, getKillRegState(isKill))
- .addImm(0);
+ .addImm(0).addReg(0);
break;
}
default: {
assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src) &&
+ !MF.getRegInfo().constrainRegClass(Src,
+ MIOpc == X86::INC64r ? X86::GR64_NOSPRegisterClass :
+ X86::GR32_NOSPRegisterClass))
+ return 0;
+
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
Src, isKill, 1);
assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src) &&
+ !MF.getRegInfo().constrainRegClass(Src,
+ MIOpc == X86::DEC64r ? X86::GR64_NOSPRegisterClass :
+ X86::GR32_NOSPRegisterClass))
+ return 0;
+
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
Src, isKill, -1);
Src, isKill, -1);
break;
case X86::ADD64rr:
- case X86::ADD32rr: {
+ case X86::ADD64rr_DB:
+ case X86::ADD32rr:
+ case X86::ADD32rr_DB: {
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r
- : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ unsigned Opc;
+ TargetRegisterClass *RC;
+ if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) {
+ Opc = X86::LEA64r;
+ RC = X86::GR64_NOSPRegisterClass;
+ } else {
+ Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ RC = X86::GR32_NOSPRegisterClass;
+ }
+
+
unsigned Src2 = MI->getOperand(2).getReg();
bool isKill2 = MI->getOperand(2).isKill();
+
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src2) &&
+ !MF.getRegInfo().constrainRegClass(Src2, RC))
+ return 0;
+
NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
LV->replaceKillInstruction(Src2, MI, NewMI);
break;
}
- case X86::ADD16rr: {
+ case X86::ADD16rr:
+ case X86::ADD16rr_DB: {
if (DisableLEA16)
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
}
case X86::ADD64ri32:
case X86::ADD64ri8:
+ case X86::ADD64ri32_DB:
+ case X86::ADD64ri8_DB:
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
Src, isKill, MI->getOperand(2).getImm());
break;
case X86::ADD32ri:
- case X86::ADD32ri8: {
+ case X86::ADD32ri8:
+ case X86::ADD32ri_DB:
+ case X86::ADD32ri8_DB: {
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
Src, isKill, MI->getOperand(2).getImm());
}
case X86::ADD16ri:
case X86::ADD16ri8:
+ case X86::ADD16ri_DB:
+ case X86::ADD16ri8_DB:
if (DisableLEA16)
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
Src, isKill, MI->getOperand(2).getImm());
static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) {
switch (BrOpc) {
default: return X86::COND_INVALID;
- case X86::JE: return X86::COND_E;
- case X86::JNE: return X86::COND_NE;
- case X86::JL: return X86::COND_L;
- case X86::JLE: return X86::COND_LE;
- case X86::JG: return X86::COND_G;
- case X86::JGE: return X86::COND_GE;
- case X86::JB: return X86::COND_B;
- case X86::JBE: return X86::COND_BE;
- case X86::JA: return X86::COND_A;
- case X86::JAE: return X86::COND_AE;
- case X86::JS: return X86::COND_S;
- case X86::JNS: return X86::COND_NS;
- case X86::JP: return X86::COND_P;
- case X86::JNP: return X86::COND_NP;
- case X86::JO: return X86::COND_O;
- case X86::JNO: return X86::COND_NO;
+ case X86::JE_4: return X86::COND_E;
+ case X86::JNE_4: return X86::COND_NE;
+ case X86::JL_4: return X86::COND_L;
+ case X86::JLE_4: return X86::COND_LE;
+ case X86::JG_4: return X86::COND_G;
+ case X86::JGE_4: return X86::COND_GE;
+ case X86::JB_4: return X86::COND_B;
+ case X86::JBE_4: return X86::COND_BE;
+ case X86::JA_4: return X86::COND_A;
+ case X86::JAE_4: return X86::COND_AE;
+ case X86::JS_4: return X86::COND_S;
+ case X86::JNS_4: return X86::COND_NS;
+ case X86::JP_4: return X86::COND_P;
+ case X86::JNP_4: return X86::COND_NP;
+ case X86::JO_4: return X86::COND_O;
+ case X86::JNO_4: return X86::COND_NO;
}
}
unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
switch (CC) {
default: llvm_unreachable("Illegal condition code!");
- case X86::COND_E: return X86::JE;
- case X86::COND_NE: return X86::JNE;
- case X86::COND_L: return X86::JL;
- case X86::COND_LE: return X86::JLE;
- case X86::COND_G: return X86::JG;
- case X86::COND_GE: return X86::JGE;
- case X86::COND_B: return X86::JB;
- case X86::COND_BE: return X86::JBE;
- case X86::COND_A: return X86::JA;
- case X86::COND_AE: return X86::JAE;
- case X86::COND_S: return X86::JS;
- case X86::COND_NS: return X86::JNS;
- case X86::COND_P: return X86::JP;
- case X86::COND_NP: return X86::JNP;
- case X86::COND_O: return X86::JO;
- case X86::COND_NO: return X86::JNO;
+ case X86::COND_E: return X86::JE_4;
+ case X86::COND_NE: return X86::JNE_4;
+ case X86::COND_L: return X86::JL_4;
+ case X86::COND_LE: return X86::JLE_4;
+ case X86::COND_G: return X86::JG_4;
+ case X86::COND_GE: return X86::JGE_4;
+ case X86::COND_B: return X86::JB_4;
+ case X86::COND_BE: return X86::JBE_4;
+ case X86::COND_A: return X86::JA_4;
+ case X86::COND_AE: return X86::JAE_4;
+ case X86::COND_S: return X86::JS_4;
+ case X86::COND_NS: return X86::JNS_4;
+ case X86::COND_P: return X86::JP_4;
+ case X86::COND_NP: return X86::JNP_4;
+ case X86::COND_O: return X86::JO_4;
+ case X86::COND_NO: return X86::JNO_4;
}
}
return !isPredicated(MI);
}
-// For purposes of branch analysis do not count FP_REG_KILL as a terminator.
-static bool isBrAnalysisUnpredicatedTerminator(const MachineInstr *MI,
- const X86InstrInfo &TII) {
- if (MI->getOpcode() == X86::FP_REG_KILL)
- return false;
- return TII.isUnpredicatedTerminator(MI);
-}
-
bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
// Start from the bottom of the block and work up, examining the
// terminator instructions.
MachineBasicBlock::iterator I = MBB.end();
+ MachineBasicBlock::iterator UnCondBrIter = MBB.end();
while (I != MBB.begin()) {
--I;
+ if (I->isDebugValue())
+ continue;
// Working from the bottom, when we see a non-terminator instruction, we're
// done.
- if (!isBrAnalysisUnpredicatedTerminator(I, *this))
+ if (!isUnpredicatedTerminator(I))
break;
// A terminator that isn't a branch can't easily be handled by this
return true;
// Handle unconditional branches.
- if (I->getOpcode() == X86::JMP) {
+ if (I->getOpcode() == X86::JMP_4) {
+ UnCondBrIter = I;
+
if (!AllowModify) {
TBB = I->getOperand(0).getMBB();
continue;
TBB = 0;
I->eraseFromParent();
I = MBB.end();
+ UnCondBrIter = MBB.end();
continue;
}
- // TBB is used to indicate the unconditinal destination.
+ // TBB is used to indicate the unconditional destination.
TBB = I->getOperand(0).getMBB();
continue;
}
// Working from the bottom, handle the first conditional branch.
if (Cond.empty()) {
+ MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
+ if (AllowModify && UnCondBrIter != MBB.end() &&
+ MBB.isLayoutSuccessor(TargetBB)) {
+ // If we can modify the code and it ends in something like:
+ //
+ // jCC L1
+ // jmp L2
+ // L1:
+ // ...
+ // L2:
+ //
+ // Then we can change this to:
+ //
+ // jnCC L2
+ // L1:
+ // ...
+ // L2:
+ //
+ // Which is a bit more efficient.
+ // We conditionally jump to the fall-through block.
+ BranchCode = GetOppositeBranchCondition(BranchCode);
+ unsigned JNCC = GetCondBranchFromCond(BranchCode);
+ MachineBasicBlock::iterator OldInst = I;
+
+ BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC))
+ .addMBB(UnCondBrIter->getOperand(0).getMBB());
+ BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4))
+ .addMBB(TargetBB);
+ MBB.addSuccessor(TargetBB);
+
+ OldInst->eraseFromParent();
+ UnCondBrIter->eraseFromParent();
+
+ // Restart the analysis.
+ UnCondBrIter = MBB.end();
+ I = MBB.end();
+ continue;
+ }
+
FBB = TBB;
TBB = I->getOperand(0).getMBB();
Cond.push_back(MachineOperand::CreateImm(BranchCode));
while (I != MBB.begin()) {
--I;
- if (I->getOpcode() != X86::JMP &&
+ if (I->isDebugValue())
+ continue;
+ if (I->getOpcode() != X86::JMP_4 &&
GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
break;
// Remove the branch.
unsigned
X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME this should probably have a DebugLoc operand
- DebugLoc dl = DebugLoc::getUnknownLoc();
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 1 || Cond.size() == 0) &&
if (Cond.empty()) {
// Unconditional branch?
assert(!FBB && "Unconditional branch with multiple successors!");
- BuildMI(&MBB, dl, get(X86::JMP)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(TBB);
return 1;
}
switch (CC) {
case X86::COND_NP_OR_E:
// Synthesize NP_OR_E with two branches.
- BuildMI(&MBB, dl, get(X86::JNP)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JNP_4)).addMBB(TBB);
++Count;
- BuildMI(&MBB, dl, get(X86::JE)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JE_4)).addMBB(TBB);
++Count;
break;
case X86::COND_NE_OR_P:
// Synthesize NE_OR_P with two branches.
- BuildMI(&MBB, dl, get(X86::JNE)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JNE_4)).addMBB(TBB);
++Count;
- BuildMI(&MBB, dl, get(X86::JP)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JP_4)).addMBB(TBB);
++Count;
break;
default: {
unsigned Opc = GetCondBranchFromCond(CC);
- BuildMI(&MBB, dl, get(Opc)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
++Count;
}
}
if (FBB) {
// Two-way Conditional branch. Insert the second branch.
- BuildMI(&MBB, dl, get(X86::JMP)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(FBB);
++Count;
}
return Count;
return X86::GR8_ABCD_HRegClass.contains(Reg);
}
-bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL = MBB.findDebugLoc(MI);
-
- // Determine if DstRC and SrcRC have a common superclass in common.
- const TargetRegisterClass *CommonRC = DestRC;
- if (DestRC == SrcRC)
- /* Source and destination have the same register class. */;
- else if (CommonRC->hasSuperClass(SrcRC))
- CommonRC = SrcRC;
- else if (!DestRC->hasSubClass(SrcRC)) {
- // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other,
- // but we want to copy then as GR64. Similarly, for GR32_NOREX and
- // GR32_NOSP, copy as GR32.
- if (SrcRC->hasSuperClass(&X86::GR64RegClass) &&
- DestRC->hasSuperClass(&X86::GR64RegClass))
- CommonRC = &X86::GR64RegClass;
- else if (SrcRC->hasSuperClass(&X86::GR32RegClass) &&
- DestRC->hasSuperClass(&X86::GR32RegClass))
- CommonRC = &X86::GR32RegClass;
- else
- CommonRC = 0;
+// Try and copy between VR128/VR64 and GR64 registers.
+static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) {
+ // SrcReg(VR128) -> DestReg(GR64)
+ // SrcReg(VR64) -> DestReg(GR64)
+ // SrcReg(GR64) -> DestReg(VR128)
+ // SrcReg(GR64) -> DestReg(VR64)
+
+ if (X86::GR64RegClass.contains(DestReg)) {
+ if (X86::VR128RegClass.contains(SrcReg)) {
+ // Copy from a VR128 register to a GR64 register.
+ return X86::MOVPQIto64rr;
+ } else if (X86::VR64RegClass.contains(SrcReg)) {
+ // Copy from a VR64 register to a GR64 register.
+ return X86::MOVSDto64rr;
+ }
+ } else if (X86::GR64RegClass.contains(SrcReg)) {
+ // Copy from a GR64 register to a VR128 register.
+ if (X86::VR128RegClass.contains(DestReg))
+ return X86::MOV64toPQIrr;
+ // Copy from a GR64 register to a VR64 register.
+ else if (X86::VR64RegClass.contains(DestReg))
+ return X86::MOV64toSDrr;
}
- if (CommonRC) {
- unsigned Opc;
- if (CommonRC == &X86::GR64RegClass || CommonRC == &X86::GR64_NOSPRegClass) {
- Opc = X86::MOV64rr;
- } else if (CommonRC == &X86::GR32RegClass ||
- CommonRC == &X86::GR32_NOSPRegClass) {
- Opc = X86::MOV32rr;
- } else if (CommonRC == &X86::GR16RegClass) {
- Opc = X86::MOV16rr;
- } else if (CommonRC == &X86::GR8RegClass) {
- // Copying to or from a physical H register on x86-64 requires a NOREX
- // move. Otherwise use a normal move.
- if ((isHReg(DestReg) || isHReg(SrcReg)) &&
- TM.getSubtarget<X86Subtarget>().is64Bit())
- Opc = X86::MOV8rr_NOREX;
- else
- Opc = X86::MOV8rr;
- } else if (CommonRC == &X86::GR64_ABCDRegClass) {
- Opc = X86::MOV64rr;
- } else if (CommonRC == &X86::GR32_ABCDRegClass) {
- Opc = X86::MOV32rr;
- } else if (CommonRC == &X86::GR16_ABCDRegClass) {
- Opc = X86::MOV16rr;
- } else if (CommonRC == &X86::GR8_ABCD_LRegClass) {
- Opc = X86::MOV8rr;
- } else if (CommonRC == &X86::GR8_ABCD_HRegClass) {
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
- Opc = X86::MOV8rr_NOREX;
- else
- Opc = X86::MOV8rr;
- } else if (CommonRC == &X86::GR64_NOREXRegClass ||
- CommonRC == &X86::GR64_NOREX_NOSPRegClass) {
- Opc = X86::MOV64rr;
- } else if (CommonRC == &X86::GR32_NOREXRegClass) {
- Opc = X86::MOV32rr;
- } else if (CommonRC == &X86::GR16_NOREXRegClass) {
- Opc = X86::MOV16rr;
- } else if (CommonRC == &X86::GR8_NOREXRegClass) {
+ return 0;
+}
+
+void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ // First deal with the normal symmetric copies.
+ unsigned Opc = 0;
+ if (X86::GR64RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOV64rr;
+ else if (X86::GR32RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOV32rr;
+ else if (X86::GR16RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOV16rr;
+ else if (X86::GR8RegClass.contains(DestReg, SrcReg)) {
+ // Copying to or from a physical H register on x86-64 requires a NOREX
+ // move. Otherwise use a normal move.
+ if ((isHReg(DestReg) || isHReg(SrcReg)) &&
+ TM.getSubtarget<X86Subtarget>().is64Bit())
+ Opc = X86::MOV8rr_NOREX;
+ else
Opc = X86::MOV8rr;
- } else if (CommonRC == &X86::RFP32RegClass) {
- Opc = X86::MOV_Fp3232;
- } else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) {
- Opc = X86::MOV_Fp6464;
- } else if (CommonRC == &X86::RFP80RegClass) {
- Opc = X86::MOV_Fp8080;
- } else if (CommonRC == &X86::FR32RegClass) {
- Opc = X86::FsMOVAPSrr;
- } else if (CommonRC == &X86::FR64RegClass) {
- Opc = X86::FsMOVAPDrr;
- } else if (CommonRC == &X86::VR128RegClass) {
- Opc = X86::MOVAPSrr;
- } else if (CommonRC == &X86::VR64RegClass) {
- Opc = X86::MMX_MOVQ64rr;
- } else {
- return false;
- }
- BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg);
- return true;
+ } else if (X86::VR128RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOVAPSrr;
+ else if (X86::VR64RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MMX_MOVQ64rr;
+ else
+ Opc = CopyToFromAsymmetricReg(DestReg, SrcReg);
+
+ if (Opc) {
+ BuildMI(MBB, MI, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
}
// Moving EFLAGS to / from another register requires a push and a pop.
- if (SrcRC == &X86::CCRRegClass) {
- if (SrcReg != X86::EFLAGS)
- return false;
- if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
- BuildMI(MBB, MI, DL, get(X86::PUSHFQ64));
+ if (SrcReg == X86::EFLAGS) {
+ if (X86::GR64RegClass.contains(DestReg)) {
+ BuildMI(MBB, MI, DL, get(X86::PUSHF64));
BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
- return true;
- } else if (DestRC == &X86::GR32RegClass ||
- DestRC == &X86::GR32_NOSPRegClass) {
- BuildMI(MBB, MI, DL, get(X86::PUSHFD));
+ return;
+ } else if (X86::GR32RegClass.contains(DestReg)) {
+ BuildMI(MBB, MI, DL, get(X86::PUSHF32));
BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
- return true;
- }
- } else if (DestRC == &X86::CCRRegClass) {
- if (DestReg != X86::EFLAGS)
- return false;
- if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
- BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg);
- BuildMI(MBB, MI, DL, get(X86::POPFQ));
- return true;
- } else if (SrcRC == &X86::GR32RegClass ||
- DestRC == &X86::GR32_NOSPRegClass) {
- BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg);
- BuildMI(MBB, MI, DL, get(X86::POPFD));
- return true;
+ return;
}
}
-
- // Moving from ST(0) turns into FpGET_ST0_32 etc.
- if (SrcRC == &X86::RSTRegClass) {
- // Copying from ST(0)/ST(1).
- if (SrcReg != X86::ST0 && SrcReg != X86::ST1)
- // Can only copy from ST(0)/ST(1) right now
- return false;
- bool isST0 = SrcReg == X86::ST0;
- unsigned Opc;
- if (DestRC == &X86::RFP32RegClass)
- Opc = isST0 ? X86::FpGET_ST0_32 : X86::FpGET_ST1_32;
- else if (DestRC == &X86::RFP64RegClass)
- Opc = isST0 ? X86::FpGET_ST0_64 : X86::FpGET_ST1_64;
- else {
- if (DestRC != &X86::RFP80RegClass)
- return false;
- Opc = isST0 ? X86::FpGET_ST0_80 : X86::FpGET_ST1_80;
+ if (DestReg == X86::EFLAGS) {
+ if (X86::GR64RegClass.contains(SrcReg)) {
+ BuildMI(MBB, MI, DL, get(X86::PUSH64r))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ BuildMI(MBB, MI, DL, get(X86::POPF64));
+ return;
+ } else if (X86::GR32RegClass.contains(SrcReg)) {
+ BuildMI(MBB, MI, DL, get(X86::PUSH32r))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ BuildMI(MBB, MI, DL, get(X86::POPF32));
+ return;
}
- BuildMI(MBB, MI, DL, get(Opc), DestReg);
- return true;
}
- // Moving to ST(0) turns into FpSET_ST0_32 etc.
- if (DestRC == &X86::RSTRegClass) {
- // Copying to ST(0) / ST(1).
- if (DestReg != X86::ST0 && DestReg != X86::ST1)
- // Can only copy to TOS right now
- return false;
- bool isST0 = DestReg == X86::ST0;
- unsigned Opc;
- if (SrcRC == &X86::RFP32RegClass)
- Opc = isST0 ? X86::FpSET_ST0_32 : X86::FpSET_ST1_32;
- else if (SrcRC == &X86::RFP64RegClass)
- Opc = isST0 ? X86::FpSET_ST0_64 : X86::FpSET_ST1_64;
- else {
- if (SrcRC != &X86::RFP80RegClass)
- return false;
- Opc = isST0 ? X86::FpSET_ST0_80 : X86::FpSET_ST1_80;
- }
- BuildMI(MBB, MI, DL, get(Opc)).addReg(SrcReg);
- return true;
- }
-
- // Not yet supported!
- return false;
+ DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
+ << " to " << RI.getName(DestReg) << '\n');
+ llvm_unreachable("Cannot emit physreg copy instruction");
}
-static unsigned getStoreRegOpcode(unsigned SrcReg,
- const TargetRegisterClass *RC,
- bool isStackAligned,
- TargetMachine &TM) {
- unsigned Opc = 0;
- if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) {
- Opc = X86::MOV64mr;
- } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) {
- Opc = X86::MOV32mr;
- } else if (RC == &X86::GR16RegClass) {
- Opc = X86::MOV16mr;
- } else if (RC == &X86::GR8RegClass) {
+static unsigned getLoadStoreRegOpcode(unsigned Reg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ const TargetMachine &TM,
+ bool load) {
+ switch (RC->getID()) {
+ default:
+ llvm_unreachable("Unknown regclass");
+ case X86::GR64RegClassID:
+ case X86::GR64_ABCDRegClassID:
+ case X86::GR64_NOREXRegClassID:
+ case X86::GR64_NOREX_NOSPRegClassID:
+ case X86::GR64_NOSPRegClassID:
+ case X86::GR64_TCRegClassID:
+ return load ? X86::MOV64rm : X86::MOV64mr;
+ case X86::GR32RegClassID:
+ case X86::GR32_ABCDRegClassID:
+ case X86::GR32_ADRegClassID:
+ case X86::GR32_NOREXRegClassID:
+ case X86::GR32_NOSPRegClassID:
+ case X86::GR32_TCRegClassID:
+ return load ? X86::MOV32rm : X86::MOV32mr;
+ case X86::GR16RegClassID:
+ case X86::GR16_ABCDRegClassID:
+ case X86::GR16_NOREXRegClassID:
+ return load ? X86::MOV16rm : X86::MOV16mr;
+ case X86::GR8RegClassID:
// Copying to or from a physical H register on x86-64 requires a NOREX
// move. Otherwise use a normal move.
- if (isHReg(SrcReg) &&
+ if (isHReg(Reg) &&
TM.getSubtarget<X86Subtarget>().is64Bit())
- Opc = X86::MOV8mr_NOREX;
+ return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
else
- Opc = X86::MOV8mr;
- } else if (RC == &X86::GR64_ABCDRegClass) {
- Opc = X86::MOV64mr;
- } else if (RC == &X86::GR32_ABCDRegClass) {
- Opc = X86::MOV32mr;
- } else if (RC == &X86::GR16_ABCDRegClass) {
- Opc = X86::MOV16mr;
- } else if (RC == &X86::GR8_ABCD_LRegClass) {
- Opc = X86::MOV8mr;
- } else if (RC == &X86::GR8_ABCD_HRegClass) {
+ return load ? X86::MOV8rm : X86::MOV8mr;
+ case X86::GR8_ABCD_LRegClassID:
+ case X86::GR8_NOREXRegClassID:
+ return load ? X86::MOV8rm :X86::MOV8mr;
+ case X86::GR8_ABCD_HRegClassID:
if (TM.getSubtarget<X86Subtarget>().is64Bit())
- Opc = X86::MOV8mr_NOREX;
+ return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
else
- Opc = X86::MOV8mr;
- } else if (RC == &X86::GR64_NOREXRegClass ||
- RC == &X86::GR64_NOREX_NOSPRegClass) {
- Opc = X86::MOV64mr;
- } else if (RC == &X86::GR32_NOREXRegClass) {
- Opc = X86::MOV32mr;
- } else if (RC == &X86::GR16_NOREXRegClass) {
- Opc = X86::MOV16mr;
- } else if (RC == &X86::GR8_NOREXRegClass) {
- Opc = X86::MOV8mr;
- } else if (RC == &X86::RFP80RegClass) {
- Opc = X86::ST_FpP80m; // pops
- } else if (RC == &X86::RFP64RegClass) {
- Opc = X86::ST_Fp64m;
- } else if (RC == &X86::RFP32RegClass) {
- Opc = X86::ST_Fp32m;
- } else if (RC == &X86::FR32RegClass) {
- Opc = X86::MOVSSmr;
- } else if (RC == &X86::FR64RegClass) {
- Opc = X86::MOVSDmr;
- } else if (RC == &X86::VR128RegClass) {
+ return load ? X86::MOV8rm : X86::MOV8mr;
+ case X86::RFP80RegClassID:
+ return load ? X86::LD_Fp80m : X86::ST_FpP80m;
+ case X86::RFP64RegClassID:
+ return load ? X86::LD_Fp64m : X86::ST_Fp64m;
+ case X86::RFP32RegClassID:
+ return load ? X86::LD_Fp32m : X86::ST_Fp32m;
+ case X86::FR32RegClassID:
+ return load ? X86::MOVSSrm : X86::MOVSSmr;
+ case X86::FR64RegClassID:
+ return load ? X86::MOVSDrm : X86::MOVSDmr;
+ case X86::VR128RegClassID:
// If stack is realigned we can use aligned stores.
- Opc = isStackAligned ? X86::MOVAPSmr : X86::MOVUPSmr;
- } else if (RC == &X86::VR64RegClass) {
- Opc = X86::MMX_MOVQ64mr;
- } else {
- llvm_unreachable("Unknown regclass");
+ if (isStackAligned)
+ return load ? X86::MOVAPSrm : X86::MOVAPSmr;
+ else
+ return load ? X86::MOVUPSrm : X86::MOVUPSmr;
+ case X86::VR64RegClassID:
+ return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
}
+}
+
+static unsigned getStoreRegOpcode(unsigned SrcReg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ TargetMachine &TM) {
+ return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false);
+}
+
- return Opc;
+static unsigned getLoadRegOpcode(unsigned DestReg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ const TargetMachine &TM) {
+ return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true);
}
void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIdx,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
+ assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
+ "Stack slot too small for store");
bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
DebugLoc DL = MBB.findDebugLoc(MI);
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- bool isAligned = (*MMOBegin)->getAlignment() >= 16;
+ bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16;
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
- DebugLoc DL = DebugLoc::getUnknownLoc();
+ DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
for (unsigned i = 0, e = Addr.size(); i != e; ++i)
MIB.addOperand(Addr[i]);
NewMIs.push_back(MIB);
}
-static unsigned getLoadRegOpcode(unsigned DestReg,
- const TargetRegisterClass *RC,
- bool isStackAligned,
- const TargetMachine &TM) {
- unsigned Opc = 0;
- if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) {
- Opc = X86::MOV64rm;
- } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) {
- Opc = X86::MOV32rm;
- } else if (RC == &X86::GR16RegClass) {
- Opc = X86::MOV16rm;
- } else if (RC == &X86::GR8RegClass) {
- // Copying to or from a physical H register on x86-64 requires a NOREX
- // move. Otherwise use a normal move.
- if (isHReg(DestReg) &&
- TM.getSubtarget<X86Subtarget>().is64Bit())
- Opc = X86::MOV8rm_NOREX;
- else
- Opc = X86::MOV8rm;
- } else if (RC == &X86::GR64_ABCDRegClass) {
- Opc = X86::MOV64rm;
- } else if (RC == &X86::GR32_ABCDRegClass) {
- Opc = X86::MOV32rm;
- } else if (RC == &X86::GR16_ABCDRegClass) {
- Opc = X86::MOV16rm;
- } else if (RC == &X86::GR8_ABCD_LRegClass) {
- Opc = X86::MOV8rm;
- } else if (RC == &X86::GR8_ABCD_HRegClass) {
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
- Opc = X86::MOV8rm_NOREX;
- else
- Opc = X86::MOV8rm;
- } else if (RC == &X86::GR64_NOREXRegClass ||
- RC == &X86::GR64_NOREX_NOSPRegClass) {
- Opc = X86::MOV64rm;
- } else if (RC == &X86::GR32_NOREXRegClass) {
- Opc = X86::MOV32rm;
- } else if (RC == &X86::GR16_NOREXRegClass) {
- Opc = X86::MOV16rm;
- } else if (RC == &X86::GR8_NOREXRegClass) {
- Opc = X86::MOV8rm;
- } else if (RC == &X86::RFP80RegClass) {
- Opc = X86::LD_Fp80m;
- } else if (RC == &X86::RFP64RegClass) {
- Opc = X86::LD_Fp64m;
- } else if (RC == &X86::RFP32RegClass) {
- Opc = X86::LD_Fp32m;
- } else if (RC == &X86::FR32RegClass) {
- Opc = X86::MOVSSrm;
- } else if (RC == &X86::FR64RegClass) {
- Opc = X86::MOVSDrm;
- } else if (RC == &X86::VR128RegClass) {
- // If stack is realigned we can use aligned loads.
- Opc = isStackAligned ? X86::MOVAPSrm : X86::MOVUPSrm;
- } else if (RC == &X86::VR64RegClass) {
- Opc = X86::MMX_MOVQ64rm;
- } else {
- llvm_unreachable("Unknown regclass");
- }
-
- return Opc;
-}
void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC) const{
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- bool isAligned = (*MMOBegin)->getAlignment() >= 16;
+ bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16;
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
- DebugLoc DL = DebugLoc::getUnknownLoc();
+ DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
for (unsigned i = 0, e = Addr.size(); i != e; ++i)
MIB.addOperand(Addr[i]);
bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r;
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
- const TargetRegisterClass *RegClass = CSI[i-1].getRegClass();
// Add the callee-saved register as live-in. It's killed at the spill.
MBB.addLiveIn(Reg);
if (Reg == FPReg)
// X86RegisterInfo::emitPrologue will handle spilling of frame register.
continue;
- if (RegClass != &X86::VR128RegClass && !isWin64) {
+ if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
CalleeFrameSize += SlotSize;
BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill);
} else {
- storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass);
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
+ RC, &RI);
}
}
bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
if (Reg == FPReg)
// X86RegisterInfo::emitEpilogue will handle restoring of frame register.
continue;
- const TargetRegisterClass *RegClass = CSI[i].getRegClass();
- if (RegClass != &X86::VR128RegClass && !isWin64) {
+ if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
BuildMI(MBB, MI, DL, get(Opc), Reg);
} else {
- loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass);
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+ RC, &RI);
}
}
return true;
}
+MachineInstr*
+X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const {
+ X86AddressMode AM;
+ AM.BaseType = X86AddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = FrameIx;
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(X86::DBG_VALUE));
+ addFullAddress(MIB, AM).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
+
static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
const SmallVectorImpl<MachineOperand> &MOs,
MachineInstr *MI,
MachineInstr *MI, unsigned i,
const SmallVectorImpl<MachineOperand> &MOs,
unsigned Size, unsigned Align) const {
- const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
+ const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
bool isTwoAddrFold = false;
unsigned NumOps = MI->getDesc().getNumOperands();
bool isTwoAddr = NumOps > 1 &&
// If table selected...
if (OpcodeTablePtr) {
// Find the Opcode to fuse
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
- OpcodeTablePtr->find((unsigned*)MI->getOpcode());
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ OpcodeTablePtr->find(MI->getOpcode());
if (I != OpcodeTablePtr->end()) {
unsigned Opcode = I->second.first;
unsigned MinAlign = I->second.second;
unsigned DstReg = NewMI->getOperand(0).getReg();
if (TargetRegisterInfo::isPhysicalRegister(DstReg))
NewMI->getOperand(0).setReg(RI.getSubReg(DstReg,
- 4/*x86_subreg_32bit*/));
+ X86::sub_32bit));
else
- NewMI->getOperand(0).setSubReg(4/*x86_subreg_32bit*/);
+ NewMI->getOperand(0).setSubReg(X86::sub_32bit);
}
return NewMI;
}
}
// No fusion
- if (PrintFailedFusing)
+ if (PrintFailedFusing && !MI->isCopy())
dbgs() << "We failed to fuse operand " << i << " in " << *MI;
return NULL;
}
case X86::Int_CVTSS2SDrr:
case X86::RCPSSr:
case X86::RCPSSr_Int:
- case X86::ROUNDSDr_Int:
- case X86::ROUNDSSr_Int:
+ case X86::ROUNDSDr:
+ case X86::ROUNDSSr:
case X86::RSQRTSSr:
case X86::RSQRTSSr_Int:
case X86::SQRTSSr:
switch (MI->getOpcode()) {
default: return NULL;
case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break;
- case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break;
- case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break;
- case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break;
}
// Check if it's safe to fold the load. If the size of the object is
// narrower than the load width, then it's not.
case X86::Int_CVTSS2SDrr:
case X86::RCPSSr:
case X86::RCPSSr_Int:
- case X86::ROUNDSDr_Int:
- case X86::ROUNDSSr_Int:
+ case X86::ROUNDSDr:
+ case X86::ROUNDSSr:
case X86::RSQRTSSr:
case X86::RSQRTSSr_Int:
case X86::SQRTSSr:
Alignment = (*LoadMI->memoperands_begin())->getAlignment();
else
switch (LoadMI->getOpcode()) {
- case X86::V_SET0:
+ case X86::AVX_SET0PSY:
+ case X86::AVX_SET0PDY:
+ Alignment = 32;
+ break;
+ case X86::V_SET0PS:
+ case X86::V_SET0PD:
+ case X86::V_SET0PI:
case X86::V_SETALLONES:
+ case X86::AVX_SET0PS:
+ case X86::AVX_SET0PD:
+ case X86::AVX_SET0PI:
Alignment = 16;
break;
case X86::FsFLD0SD:
switch (MI->getOpcode()) {
default: return NULL;
case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
- case X86::TEST16rr: NewOpc = X86::CMP16ri; break;
- case X86::TEST32rr: NewOpc = X86::CMP32ri; break;
- case X86::TEST64rr: NewOpc = X86::CMP64ri32; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri8; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri8; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri8; break;
}
// Change to CMPXXri r, 0 first.
MI->setDesc(get(NewOpc));
} else if (Ops.size() != 1)
return NULL;
- SmallVector<MachineOperand,X86AddrNumOperands> MOs;
+ // Make sure the subregisters match.
+ // Otherwise we risk changing the size of the load.
+ if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg())
+ return NULL;
+
+ SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
switch (LoadMI->getOpcode()) {
- case X86::V_SET0:
+ case X86::V_SET0PS:
+ case X86::V_SET0PD:
+ case X86::V_SET0PI:
case X86::V_SETALLONES:
+ case X86::AVX_SET0PS:
+ case X86::AVX_SET0PD:
+ case X86::AVX_SET0PI:
+ case X86::AVX_SET0PSY:
+ case X86::AVX_SET0PDY:
case X86::FsFLD0SD:
case X86::FsFLD0SS: {
- // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
+ // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure.
// Create a constant-pool entry and operands to load from it.
+ // Medium and large mode can't fold loads this way.
+ if (TM.getCodeModel() != CodeModel::Small &&
+ TM.getCodeModel() != CodeModel::Kernel)
+ return NULL;
+
// x86-32 PIC requires a PIC base register for constant pools.
unsigned PICBase = 0;
if (TM.getRelocationModel() == Reloc::PIC_) {
if (TM.getSubtarget<X86Subtarget>().is64Bit())
PICBase = X86::RIP;
else
- // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF);
+ // FIXME: PICBase = getGlobalBaseReg(&MF);
// This doesn't work for several reasons.
// 1. GlobalBaseReg may have been spilled.
// 2. It may not be live at MI.
// Create a constant-pool entry.
MachineConstantPool &MCP = *MF.getConstantPool();
const Type *Ty;
- if (LoadMI->getOpcode() == X86::FsFLD0SS)
+ unsigned Opc = LoadMI->getOpcode();
+ if (Opc == X86::FsFLD0SS)
Ty = Type::getFloatTy(MF.getFunction()->getContext());
- else if (LoadMI->getOpcode() == X86::FsFLD0SD)
+ else if (Opc == X86::FsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
+ else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
+ Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
else
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
- Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
+ const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
Constant::getAllOnesValue(Ty) :
Constant::getNullValue(Ty);
unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
default: {
// Folding a normal load. Just copy the load's address operands.
unsigned NumOps = LoadMI->getDesc().getNumOperands();
- for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i)
+ for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
MOs.push_back(LoadMI->getOperand(i));
break;
}
// Folding a memory location into the two-address part of a two-address
// instruction is different than folding it other places. It requires
// replacing the *two* registers with the memory location.
- const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
+ const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
OpcodeTablePtr = &RegOp2MemOpTable2Addr;
} else if (OpNum == 0) { // If operand 0
case X86::MOV8r0:
case X86::MOV16r0:
case X86::MOV32r0:
- case X86::MOV64r0:
- return true;
+ case X86::MOV64r0: return true;
default: break;
}
OpcodeTablePtr = &RegOp2MemOpTable0;
OpcodeTablePtr = &RegOp2MemOpTable2;
}
- if (OpcodeTablePtr) {
- // Find the Opcode to fuse
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
- OpcodeTablePtr->find((unsigned*)Opc);
- if (I != OpcodeTablePtr->end())
- return true;
- }
- return false;
+ if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
+ return true;
+ return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
}
bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
- MemOp2RegOpTable.find((unsigned*)MI->getOpcode());
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ MemOp2RegOpTable.find(MI->getOpcode());
if (I == MemOp2RegOpTable.end())
return false;
unsigned Opc = I->second.first;
const TargetInstrDesc &TID = get(Opc);
const TargetOperandInfo &TOI = TID.OpInfo[Index];
const TargetRegisterClass *RC = TOI.getRegClass(&RI);
- SmallVector<MachineOperand, X86AddrNumOperands> AddrOps;
+ if (!MI->hasOneMemOperand() &&
+ RC == &X86::VR128RegClass &&
+ !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ // Without memoperands, loadRegFromAddr and storeRegToStackSlot will
+ // conservatively assume the address is unaligned. That's bad for
+ // performance.
+ return false;
+ SmallVector<MachineOperand, X86::AddrNumOperands> AddrOps;
SmallVector<MachineOperand,2> BeforeOps;
SmallVector<MachineOperand,2> AfterOps;
SmallVector<MachineOperand,4> ImpOps;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &Op = MI->getOperand(i);
- if (i >= Index && i < Index + X86AddrNumOperands)
+ if (i >= Index && i < Index + X86::AddrNumOperands)
AddrOps.push_back(Op);
else if (Op.isReg() && Op.isImplicit())
ImpOps.push_back(Op);
loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs);
if (UnfoldStore) {
// Address operands cannot be marked isKill.
- for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) {
+ for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
MachineOperand &MO = NewMIs[0]->getOperand(i);
if (MO.isReg())
MO.setIsKill(false);
switch (DataMI->getOpcode()) {
default: break;
case X86::CMP64ri32:
+ case X86::CMP64ri8:
case X86::CMP32ri:
+ case X86::CMP32ri8:
case X86::CMP16ri:
+ case X86::CMP16ri8:
case X86::CMP8ri: {
MachineOperand &MO0 = DataMI->getOperand(0);
MachineOperand &MO1 = DataMI->getOperand(1);
if (MO1.getImm() == 0) {
switch (DataMI->getOpcode()) {
default: break;
+ case X86::CMP64ri8:
case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
+ case X86::CMP32ri8:
case X86::CMP32ri: NewOpc = X86::TEST32rr; break;
+ case X86::CMP16ri8:
case X86::CMP16ri: NewOpc = X86::TEST16rr; break;
case X86::CMP8ri: NewOpc = X86::TEST8rr; break;
}
if (!N->isMachineOpcode())
return false;
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
- MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode());
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ MemOp2RegOpTable.find(N->getMachineOpcode());
if (I == MemOp2RegOpTable.end())
return false;
unsigned Opc = I->second.first;
unsigned NumOps = N->getNumOperands();
for (unsigned i = 0; i != NumOps-1; ++i) {
SDValue Op = N->getOperand(i);
- if (i >= Index-NumDefs && i < Index-NumDefs + X86AddrNumOperands)
+ if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands)
AddrOps.push_back(Op);
else if (i < Index-NumDefs)
BeforeOps.push_back(Op);
MachineInstr::mmo_iterator> MMOs =
MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
cast<MachineSDNode>(N)->memoperands_end());
- bool isAligned = (*MMOs.first)->getAlignment() >= 16;
+ if (!(*MMOs.first) &&
+ RC == &X86::VR128RegClass &&
+ !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ // Do not introduce a slow unaligned load.
+ return false;
+ bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
VT, MVT::Other, &AddrOps[0], AddrOps.size());
NewNodes.push_back(Load);
MachineInstr::mmo_iterator> MMOs =
MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
cast<MachineSDNode>(N)->memoperands_end());
- bool isAligned = (*MMOs.first)->getAlignment() >= 16;
+ if (!(*MMOs.first) &&
+ RC == &X86::VR128RegClass &&
+ !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ // Do not introduce a slow unaligned store.
+ return false;
+ bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
isAligned, TM),
dl, MVT::Other,
unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
bool UnfoldLoad, bool UnfoldStore,
unsigned *LoadRegIndex) const {
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
- MemOp2RegOpTable.find((unsigned*)Opc);
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ MemOp2RegOpTable.find(Opc);
if (I == MemOp2RegOpTable.end())
return 0;
bool FoldedLoad = I->second.second & (1 << 4);
Load1->getOperand(2) == Load2->getOperand(2)) {
if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1)
return false;
- SDValue Op2 = Load1->getOperand(2);
- if (!isa<RegisterSDNode>(Op2) ||
- cast<RegisterSDNode>(Op2)->getReg() != 0)
- return 0;
// Now let's examine the displacements.
if (isa<ConstantSDNode>(Load1->getOperand(3)) &&
EVT VT = Load1->getValueType(0);
switch (VT.getSimpleVT().SimpleTy) {
- default: {
+ default:
// XMM registers. In 64-bit mode we can be a bit more aggressive since we
// have 16 of them to play with.
if (TM.getSubtargetImpl()->is64Bit()) {
if (NumLoads >= 3)
return false;
- } else if (NumLoads)
+ } else if (NumLoads) {
return false;
+ }
break;
- }
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::f64:
if (NumLoads)
return false;
+ break;
}
return true;
case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B:
case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+ case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
+ case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+ case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11:
+ case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15:
return true;
}
return false;
}
-
-/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
-/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
-/// size, and 3) use of X86-64 extended registers.
-unsigned X86InstrInfo::determineREX(const MachineInstr &MI) {
- unsigned REX = 0;
- const TargetInstrDesc &Desc = MI.getDesc();
-
- // Pseudo instructions do not need REX prefix byte.
- if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
- return 0;
- if (Desc.TSFlags & X86II::REX_W)
- REX |= 1 << 3;
-
- unsigned NumOps = Desc.getNumOperands();
- if (NumOps) {
- bool isTwoAddr = NumOps > 1 &&
- Desc.getOperandConstraint(1, TOI::TIED_TO) != -1;
-
- // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
- unsigned i = isTwoAddr ? 1 : 0;
- for (unsigned e = NumOps; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (MO.isReg()) {
- unsigned Reg = MO.getReg();
- if (isX86_64NonExtLowByteReg(Reg))
- REX |= 0x40;
- }
- }
-
- switch (Desc.TSFlags & X86II::FormMask) {
- case X86II::MRMInitReg:
- if (isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= (1 << 0) | (1 << 2);
- break;
- case X86II::MRMSrcReg: {
- if (isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= 1 << 2;
- i = isTwoAddr ? 2 : 1;
- for (unsigned e = NumOps; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (isX86_64ExtendedReg(MO))
- REX |= 1 << 0;
- }
- break;
- }
- case X86II::MRMSrcMem: {
- if (isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= 1 << 2;
- unsigned Bit = 0;
- i = isTwoAddr ? 2 : 1;
- for (; i != NumOps; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (MO.isReg()) {
- if (isX86_64ExtendedReg(MO))
- REX |= 1 << Bit;
- Bit++;
- }
- }
- break;
- }
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m:
- case X86II::MRMDestMem: {
- unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands);
- i = isTwoAddr ? 1 : 0;
- if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e)))
- REX |= 1 << 2;
- unsigned Bit = 0;
- for (; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (MO.isReg()) {
- if (isX86_64ExtendedReg(MO))
- REX |= 1 << Bit;
- Bit++;
- }
- }
- break;
- }
- default: {
- if (isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= 1 << 0;
- i = isTwoAddr ? 2 : 1;
- for (unsigned e = NumOps; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (isX86_64ExtendedReg(MO))
- REX |= 1 << 2;
- }
- break;
- }
- }
- }
- return REX;
-}
-
-/// sizePCRelativeBlockAddress - This method returns the size of a PC
-/// relative block address instruction
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
///
-static unsigned sizePCRelativeBlockAddress() {
- return 4;
-}
-
-/// sizeGlobalAddress - Give the size of the emission of this global address
+/// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
///
-static unsigned sizeGlobalAddress(bool dword) {
- return dword ? 8 : 4;
-}
+unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+ assert(!TM.getSubtarget<X86Subtarget>().is64Bit() &&
+ "X86-64 PIC uses RIP relative addressing");
-/// sizeConstPoolAddress - Give the size of the emission of this constant
-/// pool address
-///
-static unsigned sizeConstPoolAddress(bool dword) {
- return dword ? 8 : 4;
-}
+ X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
+ unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
+ if (GlobalBaseReg != 0)
+ return GlobalBaseReg;
-/// sizeExternalSymbolAddress - Give the size of the emission of this external
-/// symbol
-///
-static unsigned sizeExternalSymbolAddress(bool dword) {
- return dword ? 8 : 4;
+ // Create the register. The code to initialize it is inserted
+ // later, by the CGBR pass (below).
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+ X86FI->setGlobalBaseReg(GlobalBaseReg);
+ return GlobalBaseReg;
}
-/// sizeJumpTableAddress - Give the size of the emission of this jump
-/// table address
-///
-static unsigned sizeJumpTableAddress(bool dword) {
- return dword ? 8 : 4;
+// These are the replaceable SSE instructions. Some of these have Int variants
+// that we don't include here. We don't want to replace instructions selected
+// by intrinsics.
+static const unsigned ReplaceableInstrs[][3] = {
+ //PackedSingle PackedDouble PackedInt
+ { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
+ { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
+ { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr },
+ { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr },
+ { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm },
+ { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr },
+ { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm },
+ { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr },
+ { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm },
+ { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr },
+ { X86::ORPSrm, X86::ORPDrm, X86::PORrm },
+ { X86::ORPSrr, X86::ORPDrr, X86::PORrr },
+ { X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI },
+ { X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
+ { X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
+ // AVX 128-bit support
+ { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
+ { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
+ { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr },
+ { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr },
+ { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm },
+ { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
+ { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
+ { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
+ { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm },
+ { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr },
+ { X86::VORPSrm, X86::VORPDrm, X86::VPORrm },
+ { X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
+ { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI },
+ { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
+ { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
+};
+
+// FIXME: Some shuffle and unpack instructions have equivalents in different
+// domains, but they require a bit more work than just switching opcodes.
+
+static const unsigned *lookup(unsigned opcode, unsigned domain) {
+ for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
+ if (ReplaceableInstrs[i][domain-1] == opcode)
+ return ReplaceableInstrs[i];
+ return 0;
}
-static unsigned sizeConstant(unsigned Size) {
- return Size;
+std::pair<uint16_t, uint16_t>
+X86InstrInfo::GetSSEDomain(const MachineInstr *MI) const {
+ uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+ return std::make_pair(domain,
+ domain && lookup(MI->getOpcode(), domain) ? 0xe : 0);
}
-static unsigned sizeRegModRMByte(){
- return 1;
+void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const {
+ assert(Domain>0 && Domain<4 && "Invalid execution domain");
+ uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+ assert(dom && "Not an SSE instruction");
+ const unsigned *table = lookup(MI->getOpcode(), dom);
+ assert(table && "Cannot change domain");
+ MI->setDesc(get(table[Domain-1]));
}
-static unsigned sizeSIBByte(){
- return 1;
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ NopInst.setOpcode(X86::NOOP);
}
-static unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) {
- unsigned FinalSize = 0;
- // If this is a simple integer displacement that doesn't require a relocation.
- if (!RelocOp) {
- FinalSize += sizeConstant(4);
- return FinalSize;
- }
-
- // Otherwise, this is something that requires a relocation.
- if (RelocOp->isGlobal()) {
- FinalSize += sizeGlobalAddress(false);
- } else if (RelocOp->isCPI()) {
- FinalSize += sizeConstPoolAddress(false);
- } else if (RelocOp->isJTI()) {
- FinalSize += sizeJumpTableAddress(false);
- } else {
- llvm_unreachable("Unknown value to relocate!");
+bool X86InstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ switch (DefMI->getOpcode()) {
+ default: return false;
+ case X86::DIVSDrm:
+ case X86::DIVSDrm_Int:
+ case X86::DIVSDrr:
+ case X86::DIVSDrr_Int:
+ case X86::DIVSSrm:
+ case X86::DIVSSrm_Int:
+ case X86::DIVSSrr:
+ case X86::DIVSSrr_Int:
+ case X86::SQRTPDm:
+ case X86::SQRTPDm_Int:
+ case X86::SQRTPDr:
+ case X86::SQRTPDr_Int:
+ case X86::SQRTPSm:
+ case X86::SQRTPSm_Int:
+ case X86::SQRTPSr:
+ case X86::SQRTPSr_Int:
+ case X86::SQRTSDm:
+ case X86::SQRTSDm_Int:
+ case X86::SQRTSDr:
+ case X86::SQRTSDr_Int:
+ case X86::SQRTSSm:
+ case X86::SQRTSSm_Int:
+ case X86::SQRTSSr:
+ case X86::SQRTSSr_Int:
+ return true;
}
- return FinalSize;
}
-static unsigned getMemModRMByteSize(const MachineInstr &MI, unsigned Op,
- bool IsPIC, bool Is64BitMode) {
- const MachineOperand &Op3 = MI.getOperand(Op+3);
- int DispVal = 0;
- const MachineOperand *DispForReloc = 0;
- unsigned FinalSize = 0;
-
- // Figure out what sort of displacement we have to handle here.
- if (Op3.isGlobal()) {
- DispForReloc = &Op3;
- } else if (Op3.isCPI()) {
- if (Is64BitMode || IsPIC) {
- DispForReloc = &Op3;
- } else {
- DispVal = 1;
- }
- } else if (Op3.isJTI()) {
- if (Is64BitMode || IsPIC) {
- DispForReloc = &Op3;
- } else {
- DispVal = 1;
- }
- } else {
- DispVal = 1;
- }
-
- const MachineOperand &Base = MI.getOperand(Op);
- const MachineOperand &IndexReg = MI.getOperand(Op+2);
-
- unsigned BaseReg = Base.getReg();
+namespace {
+ /// CGBR - Create Global Base Reg pass. This initializes the PIC
+ /// global base register for x86-32.
+ struct CGBR : public MachineFunctionPass {
+ static char ID;
+ CGBR() : MachineFunctionPass(ID) {}
- // Is a SIB byte needed?
- if ((!Is64BitMode || DispForReloc || BaseReg != 0) &&
- IndexReg.getReg() == 0 &&
- (BaseReg == 0 || X86RegisterInfo::getX86RegNum(BaseReg) != N86::ESP)) {
- if (BaseReg == 0) { // Just a displacement?
- // Emit special case [disp32] encoding
- ++FinalSize;
- FinalSize += getDisplacementFieldSize(DispForReloc);
- } else {
- unsigned BaseRegNo = X86RegisterInfo::getX86RegNum(BaseReg);
- if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
- // Emit simple indirect register encoding... [EAX] f.e.
- ++FinalSize;
- // Be pessimistic and assume it's a disp32, not a disp8
- } else {
- // Emit the most general non-SIB encoding: [REG+disp32]
- ++FinalSize;
- FinalSize += getDisplacementFieldSize(DispForReloc);
- }
- }
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ const X86TargetMachine *TM =
+ static_cast<const X86TargetMachine *>(&MF.getTarget());
- } else { // We need a SIB byte, so start by outputting the ModR/M byte first
- assert(IndexReg.getReg() != X86::ESP &&
- IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
+ assert(!TM->getSubtarget<X86Subtarget>().is64Bit() &&
+ "X86-64 PIC uses RIP relative addressing");
- bool ForceDisp32 = false;
- if (BaseReg == 0 || DispForReloc) {
- // Emit the normal disp32 encoding.
- ++FinalSize;
- ForceDisp32 = true;
- } else {
- ++FinalSize;
- }
+ // Only emit a global base reg in PIC mode.
+ if (TM->getRelocationModel() != Reloc::PIC_)
+ return false;
- FinalSize += sizeSIBByte();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
- // Do we need to output a displacement?
- if (DispVal != 0 || ForceDisp32) {
- FinalSize += getDisplacementFieldSize(DispForReloc);
- }
- }
- return FinalSize;
-}
+ // If we didn't need a GlobalBaseReg, don't insert code.
+ if (GlobalBaseReg == 0)
+ return false;
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF.front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const X86InstrInfo *TII = TM->getInstrInfo();
-static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
- const TargetInstrDesc *Desc,
- bool IsPIC, bool Is64BitMode) {
+ unsigned PC;
+ if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
+ PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+ else
+ PC = GlobalBaseReg;
- unsigned Opcode = Desc->Opcode;
- unsigned FinalSize = 0;
-
- // Emit the lock opcode prefix as needed.
- if (Desc->TSFlags & X86II::LOCK) ++FinalSize;
-
- // Emit segment override opcode prefix as needed.
- switch (Desc->TSFlags & X86II::SegOvrMask) {
- case X86II::FS:
- case X86II::GS:
- ++FinalSize;
- break;
- default: llvm_unreachable("Invalid segment!");
- case 0: break; // No segment override!
- }
-
- // Emit the repeat opcode prefix as needed.
- if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) ++FinalSize;
-
- // Emit the operand size opcode prefix as needed.
- if (Desc->TSFlags & X86II::OpSize) ++FinalSize;
-
- // Emit the address size opcode prefix as needed.
- if (Desc->TSFlags & X86II::AdSize) ++FinalSize;
-
- bool Need0FPrefix = false;
- switch (Desc->TSFlags & X86II::Op0Mask) {
- case X86II::TB: // Two-byte opcode prefix
- case X86II::T8: // 0F 38
- case X86II::TA: // 0F 3A
- Need0FPrefix = true;
- break;
- case X86II::TF: // F2 0F 38
- ++FinalSize;
- Need0FPrefix = true;
- break;
- case X86II::REP: break; // already handled.
- case X86II::XS: // F3 0F
- ++FinalSize;
- Need0FPrefix = true;
- break;
- case X86II::XD: // F2 0F
- ++FinalSize;
- Need0FPrefix = true;
- break;
- case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
- case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
- ++FinalSize;
- break; // Two-byte opcode prefix
- default: llvm_unreachable("Invalid prefix!");
- case 0: break; // No prefix!
- }
-
- if (Is64BitMode) {
- // REX prefix
- unsigned REX = X86InstrInfo::determineREX(MI);
- if (REX)
- ++FinalSize;
- }
-
- // 0x0F escape code must be emitted just before the opcode.
- if (Need0FPrefix)
- ++FinalSize;
-
- switch (Desc->TSFlags & X86II::Op0Mask) {
- case X86II::T8: // 0F 38
- ++FinalSize;
- break;
- case X86II::TA: // 0F 3A
- ++FinalSize;
- break;
- case X86II::TF: // F2 0F 38
- ++FinalSize;
- break;
- }
-
- // If this is a two-address instruction, skip one of the register operands.
- unsigned NumOps = Desc->getNumOperands();
- unsigned CurOp = 0;
- if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1)
- CurOp++;
- else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
- // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
- --NumOps;
-
- switch (Desc->TSFlags & X86II::FormMask) {
- default: llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!");
- case X86II::Pseudo:
- // Remember the current PC offset, this is the PIC relocation
- // base address.
- switch (Opcode) {
- default:
- break;
- case TargetOpcode::INLINEASM: {
- const MachineFunction *MF = MI.getParent()->getParent();
- const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
- FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(),
- *MF->getTarget().getMCAsmInfo());
- break;
- }
- case TargetOpcode::DBG_LABEL:
- case TargetOpcode::EH_LABEL:
- break;
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- case X86::FP_REG_KILL:
- break;
- case X86::MOVPC32r: {
- // This emits the "call" portion of this pseudo instruction.
- ++FinalSize;
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- break;
- }
- }
- CurOp = NumOps;
- break;
- case X86II::RawFrm:
- ++FinalSize;
-
- if (CurOp != NumOps) {
- const MachineOperand &MO = MI.getOperand(CurOp++);
- if (MO.isMBB()) {
- FinalSize += sizePCRelativeBlockAddress();
- } else if (MO.isGlobal()) {
- FinalSize += sizeGlobalAddress(false);
- } else if (MO.isSymbol()) {
- FinalSize += sizeExternalSymbolAddress(false);
- } else if (MO.isImm()) {
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- } else {
- llvm_unreachable("Unknown RawFrm operand!");
- }
- }
- break;
-
- case X86II::AddRegFrm:
- ++FinalSize;
- ++CurOp;
-
- if (CurOp != NumOps) {
- const MachineOperand &MO1 = MI.getOperand(CurOp++);
- unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
- if (MO1.isImm())
- FinalSize += sizeConstant(Size);
- else {
- bool dword = false;
- if (Opcode == X86::MOV64ri)
- dword = true;
- if (MO1.isGlobal()) {
- FinalSize += sizeGlobalAddress(dword);
- } else if (MO1.isSymbol())
- FinalSize += sizeExternalSymbolAddress(dword);
- else if (MO1.isCPI())
- FinalSize += sizeConstPoolAddress(dword);
- else if (MO1.isJTI())
- FinalSize += sizeJumpTableAddress(dword);
+ // Operand of MovePCtoStack is completely ignored by asm printer. It's
+ // only used in JIT code emission as displacement to pc.
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
+
+ // If we're using vanilla 'GOT' PIC style, we should use relative addressing
+ // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
+ if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) {
+ // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
+ .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
+ X86II::MO_GOT_ABSOLUTE_ADDRESS);
}
- }
- break;
-
- case X86II::MRMDestReg: {
- ++FinalSize;
- FinalSize += sizeRegModRMByte();
- CurOp += 2;
- if (CurOp != NumOps) {
- ++CurOp;
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- }
- break;
- }
- case X86II::MRMDestMem: {
- ++FinalSize;
- FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode);
- CurOp += X86AddrNumOperands + 1;
- if (CurOp != NumOps) {
- ++CurOp;
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- }
- break;
- }
-
- case X86II::MRMSrcReg:
- ++FinalSize;
- FinalSize += sizeRegModRMByte();
- CurOp += 2;
- if (CurOp != NumOps) {
- ++CurOp;
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- }
- break;
-
- case X86II::MRMSrcMem: {
- int AddrOperands;
- if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
- Opcode == X86::LEA16r || Opcode == X86::LEA32r)
- AddrOperands = X86AddrNumOperands - 1; // No segment register
- else
- AddrOperands = X86AddrNumOperands;
-
- ++FinalSize;
- FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode);
- CurOp += AddrOperands + 1;
- if (CurOp != NumOps) {
- ++CurOp;
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- }
- break;
- }
- case X86II::MRM0r: case X86II::MRM1r:
- case X86II::MRM2r: case X86II::MRM3r:
- case X86II::MRM4r: case X86II::MRM5r:
- case X86II::MRM6r: case X86II::MRM7r:
- ++FinalSize;
- if (Desc->getOpcode() == X86::LFENCE ||
- Desc->getOpcode() == X86::MFENCE) {
- // Special handling of lfence and mfence;
- FinalSize += sizeRegModRMByte();
- } else if (Desc->getOpcode() == X86::MONITOR ||
- Desc->getOpcode() == X86::MWAIT) {
- // Special handling of monitor and mwait.
- FinalSize += sizeRegModRMByte() + 1; // +1 for the opcode.
- } else {
- ++CurOp;
- FinalSize += sizeRegModRMByte();
+ return true;
}
- if (CurOp != NumOps) {
- const MachineOperand &MO1 = MI.getOperand(CurOp++);
- unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
- if (MO1.isImm())
- FinalSize += sizeConstant(Size);
- else {
- bool dword = false;
- if (Opcode == X86::MOV64ri32)
- dword = true;
- if (MO1.isGlobal()) {
- FinalSize += sizeGlobalAddress(dword);
- } else if (MO1.isSymbol())
- FinalSize += sizeExternalSymbolAddress(dword);
- else if (MO1.isCPI())
- FinalSize += sizeConstPoolAddress(dword);
- else if (MO1.isJTI())
- FinalSize += sizeJumpTableAddress(dword);
- }
+ virtual const char *getPassName() const {
+ return "X86 PIC Global Base Reg Initialization";
}
- break;
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m: {
-
- ++FinalSize;
- FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode);
- CurOp += X86AddrNumOperands;
-
- if (CurOp != NumOps) {
- const MachineOperand &MO = MI.getOperand(CurOp++);
- unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
- if (MO.isImm())
- FinalSize += sizeConstant(Size);
- else {
- bool dword = false;
- if (Opcode == X86::MOV64mi32)
- dword = true;
- if (MO.isGlobal()) {
- FinalSize += sizeGlobalAddress(dword);
- } else if (MO.isSymbol())
- FinalSize += sizeExternalSymbolAddress(dword);
- else if (MO.isCPI())
- FinalSize += sizeConstPoolAddress(dword);
- else if (MO.isJTI())
- FinalSize += sizeJumpTableAddress(dword);
- }
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
}
- break;
- }
-
- case X86II::MRMInitReg:
- ++FinalSize;
- // Duplicate register, used by things like MOV8r0 (aka xor reg,reg).
- FinalSize += sizeRegModRMByte();
- ++CurOp;
- break;
- }
-
- if (!Desc->isVariadic() && CurOp != NumOps) {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "Cannot determine size: " << MI;
- llvm_report_error(Msg.str());
- }
-
-
- return FinalSize;
-}
-
-
-unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
- const TargetInstrDesc &Desc = MI->getDesc();
- bool IsPIC = TM.getRelocationModel() == Reloc::PIC_;
- bool Is64BitMode = TM.getSubtargetImpl()->is64Bit();
- unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode);
- if (Desc.getOpcode() == X86::MOVPC32r)
- Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode);
- return Size;
+ };
}
-/// getGlobalBaseReg - Return a virtual register initialized with the
-/// the global base register value. Output instructions required to
-/// initialize the register in the function entry block, if necessary.
-///
-unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
- assert(!TM.getSubtarget<X86Subtarget>().is64Bit() &&
- "X86-64 PIC uses RIP relative addressing");
-
- X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
- unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
- if (GlobalBaseReg != 0)
- return GlobalBaseReg;
-
- // Insert the set of GlobalBaseReg into the first MBB of the function
- MachineBasicBlock &FirstMBB = MF->front();
- MachineBasicBlock::iterator MBBI = FirstMBB.begin();
- DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
- MachineRegisterInfo &RegInfo = MF->getRegInfo();
- unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
-
- const TargetInstrInfo *TII = TM.getInstrInfo();
- // Operand of MovePCtoStack is completely ignored by asm printer. It's
- // only used in JIT code emission as displacement to pc.
- BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
-
- // If we're using vanilla 'GOT' PIC style, we should use relative addressing
- // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
- if (TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) {
- GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
- // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
- BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
- .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
- X86II::MO_GOT_ABSOLUTE_ADDRESS);
- } else {
- GlobalBaseReg = PC;
- }
-
- X86FI->setGlobalBaseReg(GlobalBaseReg);
- return GlobalBaseReg;
-}
+char CGBR::ID = 0;
+FunctionPass*
+llvm::createGlobalBaseRegPass() { return new CGBR(); }