X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86InstrInfo.cpp;h=1a7f06cbe3a1dec3fce76727464aca63e82a5b52;hb=6c4025773a1de0cde0e8e27e8954e48a0fabbcd7;hp=a43c5ddad3ffb2f7d390eb4fec82ca8ce19ddd5e;hpb=62c939d7d5572e57963a5f26fb6fe802e13dc0bf;p=oota-llvm.git diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index a43c5ddad3f..1a7f06cbe3a 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -18,6 +18,9 @@ #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" +#include "llvm/GlobalVariable.h" +#include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -25,25 +28,24 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetAsmInfo.h" - +#include "llvm/MC/MCAsmInfo.h" using namespace llvm; -namespace { - cl::opt - NoFusing("disable-spill-fusing", - cl::desc("Disable fusing of spill code into instructions")); - cl::opt - PrintFailedFusing("print-failed-fuse-candidates", - cl::desc("Print instructions that the allocator wants to" - " fuse, but the X86 backend currently can't"), - cl::Hidden); - cl::opt - ReMatPICStubLoad("remat-pic-stub-load", - cl::desc("Re-materialize load from stub in PIC mode"), - cl::init(false), cl::Hidden); -} +static cl::opt +NoFusing("disable-spill-fusing", + cl::desc("Disable fusing of spill code into instructions")); +static cl::opt +PrintFailedFusing("print-failed-fuse-candidates", + cl::desc("Print instructions that the allocator wants to" + " fuse, but the X86 backend currently can't"), + cl::Hidden); +static cl::opt +ReMatPICStubLoad("remat-pic-stub-load", + cl::desc("Re-materialize load from stub in PIC mode"), + cl::init(false), cl::Hidden); X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), @@ -211,9 +213,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) unsigned RegOp = OpTbl2Addr[i][0]; unsigned MemOp = OpTbl2Addr[i][1]; if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp, - MemOp)).second) + std::make_pair(MemOp,0))).second) assert(false && "Duplicated entries?"); - unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); // Index 0,folded load and store + // Index 0, folded load and store, no alignment requirement. + unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, std::make_pair(RegOp, AuxInfo))).second) @@ -221,92 +224,94 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) } // If the third value is 1, then it's folding either a load or a store. - static const unsigned OpTbl0[][3] = { - { X86::CALL32r, X86::CALL32m, 1 }, - { X86::CALL64r, X86::CALL64m, 1 }, - { X86::CMP16ri, X86::CMP16mi, 1 }, - { X86::CMP16ri8, X86::CMP16mi8, 1 }, - { X86::CMP16rr, X86::CMP16mr, 1 }, - { X86::CMP32ri, X86::CMP32mi, 1 }, - { X86::CMP32ri8, X86::CMP32mi8, 1 }, - { X86::CMP32rr, X86::CMP32mr, 1 }, - { X86::CMP64ri32, X86::CMP64mi32, 1 }, - { X86::CMP64ri8, X86::CMP64mi8, 1 }, - { X86::CMP64rr, X86::CMP64mr, 1 }, - { X86::CMP8ri, X86::CMP8mi, 1 }, - { X86::CMP8rr, X86::CMP8mr, 1 }, - { X86::DIV16r, X86::DIV16m, 1 }, - { X86::DIV32r, X86::DIV32m, 1 }, - { X86::DIV64r, X86::DIV64m, 1 }, - { X86::DIV8r, X86::DIV8m, 1 }, - { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0 }, - { X86::FsMOVAPDrr, X86::MOVSDmr, 0 }, - { X86::FsMOVAPSrr, X86::MOVSSmr, 0 }, - { X86::IDIV16r, X86::IDIV16m, 1 }, - { X86::IDIV32r, X86::IDIV32m, 1 }, - { X86::IDIV64r, X86::IDIV64m, 1 }, - { X86::IDIV8r, X86::IDIV8m, 1 }, - { X86::IMUL16r, X86::IMUL16m, 1 }, - { X86::IMUL32r, X86::IMUL32m, 1 }, - { X86::IMUL64r, X86::IMUL64m, 1 }, - { X86::IMUL8r, X86::IMUL8m, 1 }, - { X86::JMP32r, X86::JMP32m, 1 }, - { X86::JMP64r, X86::JMP64m, 1 }, - { X86::MOV16ri, X86::MOV16mi, 0 }, - { X86::MOV16rr, X86::MOV16mr, 0 }, - { X86::MOV16to16_, X86::MOV16_mr, 0 }, - { X86::MOV32ri, X86::MOV32mi, 0 }, - { X86::MOV32rr, X86::MOV32mr, 0 }, - { X86::MOV32to32_, X86::MOV32_mr, 0 }, - { X86::MOV64ri32, X86::MOV64mi32, 0 }, - { X86::MOV64rr, X86::MOV64mr, 0 }, - { X86::MOV8ri, X86::MOV8mi, 0 }, - { X86::MOV8rr, X86::MOV8mr, 0 }, - { X86::MOVAPDrr, X86::MOVAPDmr, 0 }, - { X86::MOVAPSrr, X86::MOVAPSmr, 0 }, - { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0 }, - { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0 }, - { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0 }, - { X86::MOVSDrr, X86::MOVSDmr, 0 }, - { X86::MOVSDto64rr, X86::MOVSDto64mr, 0 }, - { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0 }, - { X86::MOVSSrr, X86::MOVSSmr, 0 }, - { X86::MOVUPDrr, X86::MOVUPDmr, 0 }, - { X86::MOVUPSrr, X86::MOVUPSmr, 0 }, - { X86::MUL16r, X86::MUL16m, 1 }, - { X86::MUL32r, X86::MUL32m, 1 }, - { X86::MUL64r, X86::MUL64m, 1 }, - { X86::MUL8r, X86::MUL8m, 1 }, - { X86::SETAEr, X86::SETAEm, 0 }, - { X86::SETAr, X86::SETAm, 0 }, - { X86::SETBEr, X86::SETBEm, 0 }, - { X86::SETBr, X86::SETBm, 0 }, - { X86::SETCr, X86::SETCm, 0 }, - { X86::SETEr, X86::SETEm, 0 }, - { X86::SETGEr, X86::SETGEm, 0 }, - { X86::SETGr, X86::SETGm, 0 }, - { X86::SETLEr, X86::SETLEm, 0 }, - { X86::SETLr, X86::SETLm, 0 }, - { X86::SETNCr, X86::SETNCm, 0 }, - { X86::SETNEr, X86::SETNEm, 0 }, - { X86::SETNOr, X86::SETNOm, 0 }, - { X86::SETNPr, X86::SETNPm, 0 }, - { X86::SETNSr, X86::SETNSm, 0 }, - { X86::SETOr, X86::SETOm, 0 }, - { X86::SETPr, X86::SETPm, 0 }, - { X86::SETSr, X86::SETSm, 0 }, - { X86::TAILJMPr, X86::TAILJMPm, 1 }, - { X86::TEST16ri, X86::TEST16mi, 1 }, - { X86::TEST32ri, X86::TEST32mi, 1 }, - { X86::TEST64ri32, X86::TEST64mi32, 1 }, - { X86::TEST8ri, X86::TEST8mi, 1 } + static const unsigned OpTbl0[][4] = { + { X86::BT16ri8, X86::BT16mi8, 1, 0 }, + { X86::BT32ri8, X86::BT32mi8, 1, 0 }, + { X86::BT64ri8, X86::BT64mi8, 1, 0 }, + { X86::CALL32r, X86::CALL32m, 1, 0 }, + { X86::CALL64r, X86::CALL64m, 1, 0 }, + { X86::CMP16ri, X86::CMP16mi, 1, 0 }, + { X86::CMP16ri8, X86::CMP16mi8, 1, 0 }, + { X86::CMP16rr, X86::CMP16mr, 1, 0 }, + { X86::CMP32ri, X86::CMP32mi, 1, 0 }, + { X86::CMP32ri8, X86::CMP32mi8, 1, 0 }, + { X86::CMP32rr, X86::CMP32mr, 1, 0 }, + { X86::CMP64ri32, X86::CMP64mi32, 1, 0 }, + { X86::CMP64ri8, X86::CMP64mi8, 1, 0 }, + { X86::CMP64rr, X86::CMP64mr, 1, 0 }, + { X86::CMP8ri, X86::CMP8mi, 1, 0 }, + { X86::CMP8rr, X86::CMP8mr, 1, 0 }, + { X86::DIV16r, X86::DIV16m, 1, 0 }, + { X86::DIV32r, X86::DIV32m, 1, 0 }, + { X86::DIV64r, X86::DIV64m, 1, 0 }, + { X86::DIV8r, X86::DIV8m, 1, 0 }, + { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 }, + { X86::FsMOVAPDrr, X86::MOVSDmr, 0, 0 }, + { X86::FsMOVAPSrr, X86::MOVSSmr, 0, 0 }, + { X86::IDIV16r, X86::IDIV16m, 1, 0 }, + { X86::IDIV32r, X86::IDIV32m, 1, 0 }, + { X86::IDIV64r, X86::IDIV64m, 1, 0 }, + { X86::IDIV8r, X86::IDIV8m, 1, 0 }, + { X86::IMUL16r, X86::IMUL16m, 1, 0 }, + { X86::IMUL32r, X86::IMUL32m, 1, 0 }, + { X86::IMUL64r, X86::IMUL64m, 1, 0 }, + { X86::IMUL8r, X86::IMUL8m, 1, 0 }, + { X86::JMP32r, X86::JMP32m, 1, 0 }, + { X86::JMP64r, X86::JMP64m, 1, 0 }, + { X86::MOV16ri, X86::MOV16mi, 0, 0 }, + { X86::MOV16rr, X86::MOV16mr, 0, 0 }, + { X86::MOV32ri, X86::MOV32mi, 0, 0 }, + { X86::MOV32rr, X86::MOV32mr, 0, 0 }, + { X86::MOV64ri32, X86::MOV64mi32, 0, 0 }, + { X86::MOV64rr, X86::MOV64mr, 0, 0 }, + { X86::MOV8ri, X86::MOV8mi, 0, 0 }, + { X86::MOV8rr, X86::MOV8mr, 0, 0 }, + { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 }, + { X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 }, + { X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 }, + { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 }, + { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 }, + { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 }, + { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0, 0 }, + { X86::MOVSDrr, X86::MOVSDmr, 0, 0 }, + { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 }, + { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 }, + { X86::MOVSSrr, X86::MOVSSmr, 0, 0 }, + { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 }, + { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 }, + { X86::MUL16r, X86::MUL16m, 1, 0 }, + { X86::MUL32r, X86::MUL32m, 1, 0 }, + { X86::MUL64r, X86::MUL64m, 1, 0 }, + { X86::MUL8r, X86::MUL8m, 1, 0 }, + { X86::SETAEr, X86::SETAEm, 0, 0 }, + { X86::SETAr, X86::SETAm, 0, 0 }, + { X86::SETBEr, X86::SETBEm, 0, 0 }, + { X86::SETBr, X86::SETBm, 0, 0 }, + { X86::SETEr, X86::SETEm, 0, 0 }, + { X86::SETGEr, X86::SETGEm, 0, 0 }, + { X86::SETGr, X86::SETGm, 0, 0 }, + { X86::SETLEr, X86::SETLEm, 0, 0 }, + { X86::SETLr, X86::SETLm, 0, 0 }, + { X86::SETNEr, X86::SETNEm, 0, 0 }, + { X86::SETNOr, X86::SETNOm, 0, 0 }, + { X86::SETNPr, X86::SETNPm, 0, 0 }, + { X86::SETNSr, X86::SETNSm, 0, 0 }, + { X86::SETOr, X86::SETOm, 0, 0 }, + { X86::SETPr, X86::SETPm, 0, 0 }, + { X86::SETSr, X86::SETSm, 0, 0 }, + { X86::TAILJMPr, X86::TAILJMPm, 1, 0 }, + { X86::TEST16ri, X86::TEST16mi, 1, 0 }, + { X86::TEST32ri, X86::TEST32mi, 1, 0 }, + { X86::TEST64ri32, X86::TEST64mi32, 1, 0 }, + { X86::TEST8ri, X86::TEST8mi, 1, 0 } }; for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { unsigned RegOp = OpTbl0[i][0]; unsigned MemOp = OpTbl0[i][1]; + unsigned Align = OpTbl0[i][3]; if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp, - MemOp)).second) + std::make_pair(MemOp,Align))).second) assert(false && "Duplicated entries?"); unsigned FoldedLoad = OpTbl0[i][2]; // Index 0, folded load or store. @@ -317,333 +322,342 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) AmbEntries.push_back(MemOp); } - static const unsigned OpTbl1[][2] = { - { X86::CMP16rr, X86::CMP16rm }, - { X86::CMP32rr, X86::CMP32rm }, - { X86::CMP64rr, X86::CMP64rm }, - { X86::CMP8rr, X86::CMP8rm }, - { X86::CVTSD2SSrr, X86::CVTSD2SSrm }, - { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm }, - { X86::CVTSI2SDrr, X86::CVTSI2SDrm }, - { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm }, - { X86::CVTSI2SSrr, X86::CVTSI2SSrm }, - { X86::CVTSS2SDrr, X86::CVTSS2SDrm }, - { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm }, - { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm }, - { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm }, - { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm }, - { X86::FsMOVAPDrr, X86::MOVSDrm }, - { X86::FsMOVAPSrr, X86::MOVSSrm }, - { X86::IMUL16rri, X86::IMUL16rmi }, - { X86::IMUL16rri8, X86::IMUL16rmi8 }, - { X86::IMUL32rri, X86::IMUL32rmi }, - { X86::IMUL32rri8, X86::IMUL32rmi8 }, - { X86::IMUL64rri32, X86::IMUL64rmi32 }, - { X86::IMUL64rri8, X86::IMUL64rmi8 }, - { X86::Int_CMPSDrr, X86::Int_CMPSDrm }, - { X86::Int_CMPSSrr, X86::Int_CMPSSrm }, - { X86::Int_COMISDrr, X86::Int_COMISDrm }, - { X86::Int_COMISSrr, X86::Int_COMISSrm }, - { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm }, - { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm }, - { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm }, - { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm }, - { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm }, - { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm }, - { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm }, - { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm }, - { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm }, - { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm }, - { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm }, - { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm }, - { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm }, - { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm }, - { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm }, - { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm }, - { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm }, - { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm }, - { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm }, - { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm }, - { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm }, - { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm }, - { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm }, - { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm }, - { X86::MOV16rr, X86::MOV16rm }, - { X86::MOV16to16_, X86::MOV16_rm }, - { X86::MOV32rr, X86::MOV32rm }, - { X86::MOV32to32_, X86::MOV32_rm }, - { X86::MOV64rr, X86::MOV64rm }, - { X86::MOV64toPQIrr, X86::MOVQI2PQIrm }, - { X86::MOV64toSDrr, X86::MOV64toSDrm }, - { X86::MOV8rr, X86::MOV8rm }, - { X86::MOVAPDrr, X86::MOVAPDrm }, - { X86::MOVAPSrr, X86::MOVAPSrm }, - { X86::MOVDDUPrr, X86::MOVDDUPrm }, - { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm }, - { X86::MOVDI2SSrr, X86::MOVDI2SSrm }, - { X86::MOVSD2PDrr, X86::MOVSD2PDrm }, - { X86::MOVSDrr, X86::MOVSDrm }, - { X86::MOVSHDUPrr, X86::MOVSHDUPrm }, - { X86::MOVSLDUPrr, X86::MOVSLDUPrm }, - { X86::MOVSS2PSrr, X86::MOVSS2PSrm }, - { X86::MOVSSrr, X86::MOVSSrm }, - { X86::MOVSX16rr8, X86::MOVSX16rm8 }, - { X86::MOVSX32rr16, X86::MOVSX32rm16 }, - { X86::MOVSX32rr8, X86::MOVSX32rm8 }, - { X86::MOVSX64rr16, X86::MOVSX64rm16 }, - { X86::MOVSX64rr32, X86::MOVSX64rm32 }, - { X86::MOVSX64rr8, X86::MOVSX64rm8 }, - { X86::MOVUPDrr, X86::MOVUPDrm }, - { X86::MOVUPSrr, X86::MOVUPSrm }, - { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm }, - { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm }, - { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm }, - { X86::MOVZX16rr8, X86::MOVZX16rm8 }, - { X86::MOVZX32rr16, X86::MOVZX32rm16 }, - { X86::MOVZX32rr8, X86::MOVZX32rm8 }, - { X86::MOVZX64rr16, X86::MOVZX64rm16 }, - { X86::MOVZX64rr32, X86::MOVZX64rm32 }, - { X86::MOVZX64rr8, X86::MOVZX64rm8 }, - { X86::PSHUFDri, X86::PSHUFDmi }, - { X86::PSHUFHWri, X86::PSHUFHWmi }, - { X86::PSHUFLWri, X86::PSHUFLWmi }, - { X86::RCPPSr, X86::RCPPSm }, - { X86::RCPPSr_Int, X86::RCPPSm_Int }, - { X86::RSQRTPSr, X86::RSQRTPSm }, - { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int }, - { X86::RSQRTSSr, X86::RSQRTSSm }, - { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int }, - { X86::SQRTPDr, X86::SQRTPDm }, - { X86::SQRTPDr_Int, X86::SQRTPDm_Int }, - { X86::SQRTPSr, X86::SQRTPSm }, - { X86::SQRTPSr_Int, X86::SQRTPSm_Int }, - { X86::SQRTSDr, X86::SQRTSDm }, - { X86::SQRTSDr_Int, X86::SQRTSDm_Int }, - { X86::SQRTSSr, X86::SQRTSSm }, - { X86::SQRTSSr_Int, X86::SQRTSSm_Int }, - { X86::TEST16rr, X86::TEST16rm }, - { X86::TEST32rr, X86::TEST32rm }, - { X86::TEST64rr, X86::TEST64rm }, - { X86::TEST8rr, X86::TEST8rm }, + static const unsigned OpTbl1[][3] = { + { X86::CMP16rr, X86::CMP16rm, 0 }, + { X86::CMP32rr, X86::CMP32rm, 0 }, + { X86::CMP64rr, X86::CMP64rm, 0 }, + { X86::CMP8rr, X86::CMP8rm, 0 }, + { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 }, + { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 }, + { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 }, + { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 }, + { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 }, + { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 }, + { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 }, + { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, + { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, + { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, + { X86::FsMOVAPDrr, X86::MOVSDrm, 0 }, + { X86::FsMOVAPSrr, X86::MOVSSrm, 0 }, + { X86::IMUL16rri, X86::IMUL16rmi, 0 }, + { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, + { X86::IMUL32rri, X86::IMUL32rmi, 0 }, + { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, + { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, + { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, + { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, + { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, + { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, + { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, + { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 }, + { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, 16 }, + { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, 16 }, + { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 }, + { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 }, + { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, + { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 }, + { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm, 0 }, + { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, + { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, + { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, + { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, + { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, + { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, + { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 }, + { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 }, + { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 }, + { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 }, + { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, + { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 }, + { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 }, + { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 }, + { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 }, + { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 }, + { X86::MOV16rr, X86::MOV16rm, 0 }, + { X86::MOV32rr, X86::MOV32rm, 0 }, + { X86::MOV64rr, X86::MOV64rm, 0 }, + { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, + { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, + { X86::MOV8rr, X86::MOV8rm, 0 }, + { X86::MOVAPDrr, X86::MOVAPDrm, 16 }, + { X86::MOVAPSrr, X86::MOVAPSrm, 16 }, + { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 }, + { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, + { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, + { X86::MOVDQArr, X86::MOVDQArm, 16 }, + { X86::MOVSD2PDrr, X86::MOVSD2PDrm, 0 }, + { X86::MOVSDrr, X86::MOVSDrm, 0 }, + { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 }, + { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 }, + { X86::MOVSS2PSrr, X86::MOVSS2PSrm, 0 }, + { X86::MOVSSrr, X86::MOVSSrm, 0 }, + { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, + { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, + { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, + { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 }, + { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 }, + { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, + { X86::MOVUPDrr, X86::MOVUPDrm, 16 }, + { X86::MOVUPSrr, X86::MOVUPSrm, 16 }, + { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 }, + { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, + { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 }, + { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, + { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, + { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 }, + { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 }, + { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 }, + { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 }, + { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 }, + { X86::PSHUFDri, X86::PSHUFDmi, 16 }, + { X86::PSHUFHWri, X86::PSHUFHWmi, 16 }, + { X86::PSHUFLWri, X86::PSHUFLWmi, 16 }, + { X86::RCPPSr, X86::RCPPSm, 16 }, + { X86::RCPPSr_Int, X86::RCPPSm_Int, 16 }, + { X86::RSQRTPSr, X86::RSQRTPSm, 16 }, + { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, 16 }, + { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, + { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 }, + { X86::SQRTPDr, X86::SQRTPDm, 16 }, + { X86::SQRTPDr_Int, X86::SQRTPDm_Int, 16 }, + { X86::SQRTPSr, X86::SQRTPSm, 16 }, + { X86::SQRTPSr_Int, X86::SQRTPSm_Int, 16 }, + { X86::SQRTSDr, X86::SQRTSDm, 0 }, + { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 }, + { X86::SQRTSSr, X86::SQRTSSm, 0 }, + { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 }, + { X86::TEST16rr, X86::TEST16rm, 0 }, + { X86::TEST32rr, X86::TEST32rm, 0 }, + { X86::TEST64rr, X86::TEST64rm, 0 }, + { X86::TEST8rr, X86::TEST8rm, 0 }, // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 - { X86::UCOMISDrr, X86::UCOMISDrm }, - { X86::UCOMISSrr, X86::UCOMISSrm } + { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, + { X86::UCOMISSrr, X86::UCOMISSrm, 0 } }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { unsigned RegOp = OpTbl1[i][0]; unsigned MemOp = OpTbl1[i][1]; + unsigned Align = OpTbl1[i][2]; if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp, - MemOp)).second) + std::make_pair(MemOp,Align))).second) assert(false && "Duplicated entries?"); - unsigned AuxInfo = 1 | (1 << 4); // Index 1, folded load + // Index 1, folded load + unsigned AuxInfo = 1 | (1 << 4); if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, std::make_pair(RegOp, AuxInfo))).second) AmbEntries.push_back(MemOp); } - static const unsigned OpTbl2[][2] = { - { X86::ADC32rr, X86::ADC32rm }, - { X86::ADC64rr, X86::ADC64rm }, - { X86::ADD16rr, X86::ADD16rm }, - { X86::ADD32rr, X86::ADD32rm }, - { X86::ADD64rr, X86::ADD64rm }, - { X86::ADD8rr, X86::ADD8rm }, - { X86::ADDPDrr, X86::ADDPDrm }, - { X86::ADDPSrr, X86::ADDPSrm }, - { X86::ADDSDrr, X86::ADDSDrm }, - { X86::ADDSSrr, X86::ADDSSrm }, - { X86::ADDSUBPDrr, X86::ADDSUBPDrm }, - { X86::ADDSUBPSrr, X86::ADDSUBPSrm }, - { X86::AND16rr, X86::AND16rm }, - { X86::AND32rr, X86::AND32rm }, - { X86::AND64rr, X86::AND64rm }, - { X86::AND8rr, X86::AND8rm }, - { X86::ANDNPDrr, X86::ANDNPDrm }, - { X86::ANDNPSrr, X86::ANDNPSrm }, - { X86::ANDPDrr, X86::ANDPDrm }, - { X86::ANDPSrr, X86::ANDPSrm }, - { X86::CMOVA16rr, X86::CMOVA16rm }, - { X86::CMOVA32rr, X86::CMOVA32rm }, - { X86::CMOVA64rr, X86::CMOVA64rm }, - { X86::CMOVAE16rr, X86::CMOVAE16rm }, - { X86::CMOVAE32rr, X86::CMOVAE32rm }, - { X86::CMOVAE64rr, X86::CMOVAE64rm }, - { X86::CMOVB16rr, X86::CMOVB16rm }, - { X86::CMOVB32rr, X86::CMOVB32rm }, - { X86::CMOVB64rr, X86::CMOVB64rm }, - { X86::CMOVBE16rr, X86::CMOVBE16rm }, - { X86::CMOVBE32rr, X86::CMOVBE32rm }, - { X86::CMOVBE64rr, X86::CMOVBE64rm }, - { X86::CMOVE16rr, X86::CMOVE16rm }, - { X86::CMOVE32rr, X86::CMOVE32rm }, - { X86::CMOVE64rr, X86::CMOVE64rm }, - { X86::CMOVG16rr, X86::CMOVG16rm }, - { X86::CMOVG32rr, X86::CMOVG32rm }, - { X86::CMOVG64rr, X86::CMOVG64rm }, - { X86::CMOVGE16rr, X86::CMOVGE16rm }, - { X86::CMOVGE32rr, X86::CMOVGE32rm }, - { X86::CMOVGE64rr, X86::CMOVGE64rm }, - { X86::CMOVL16rr, X86::CMOVL16rm }, - { X86::CMOVL32rr, X86::CMOVL32rm }, - { X86::CMOVL64rr, X86::CMOVL64rm }, - { X86::CMOVLE16rr, X86::CMOVLE16rm }, - { X86::CMOVLE32rr, X86::CMOVLE32rm }, - { X86::CMOVLE64rr, X86::CMOVLE64rm }, - { X86::CMOVNE16rr, X86::CMOVNE16rm }, - { X86::CMOVNE32rr, X86::CMOVNE32rm }, - { X86::CMOVNE64rr, X86::CMOVNE64rm }, - { X86::CMOVNP16rr, X86::CMOVNP16rm }, - { X86::CMOVNP32rr, X86::CMOVNP32rm }, - { X86::CMOVNP64rr, X86::CMOVNP64rm }, - { X86::CMOVNS16rr, X86::CMOVNS16rm }, - { X86::CMOVNS32rr, X86::CMOVNS32rm }, - { X86::CMOVNS64rr, X86::CMOVNS64rm }, - { X86::CMOVP16rr, X86::CMOVP16rm }, - { X86::CMOVP32rr, X86::CMOVP32rm }, - { X86::CMOVP64rr, X86::CMOVP64rm }, - { X86::CMOVS16rr, X86::CMOVS16rm }, - { X86::CMOVS32rr, X86::CMOVS32rm }, - { X86::CMOVS64rr, X86::CMOVS64rm }, - { X86::CMPPDrri, X86::CMPPDrmi }, - { X86::CMPPSrri, X86::CMPPSrmi }, - { X86::CMPSDrr, X86::CMPSDrm }, - { X86::CMPSSrr, X86::CMPSSrm }, - { X86::DIVPDrr, X86::DIVPDrm }, - { X86::DIVPSrr, X86::DIVPSrm }, - { X86::DIVSDrr, X86::DIVSDrm }, - { X86::DIVSSrr, X86::DIVSSrm }, - { X86::FsANDNPDrr, X86::FsANDNPDrm }, - { X86::FsANDNPSrr, X86::FsANDNPSrm }, - { X86::FsANDPDrr, X86::FsANDPDrm }, - { X86::FsANDPSrr, X86::FsANDPSrm }, - { X86::FsORPDrr, X86::FsORPDrm }, - { X86::FsORPSrr, X86::FsORPSrm }, - { X86::FsXORPDrr, X86::FsXORPDrm }, - { X86::FsXORPSrr, X86::FsXORPSrm }, - { X86::HADDPDrr, X86::HADDPDrm }, - { X86::HADDPSrr, X86::HADDPSrm }, - { X86::HSUBPDrr, X86::HSUBPDrm }, - { X86::HSUBPSrr, X86::HSUBPSrm }, - { X86::IMUL16rr, X86::IMUL16rm }, - { X86::IMUL32rr, X86::IMUL32rm }, - { X86::IMUL64rr, X86::IMUL64rm }, - { X86::MAXPDrr, X86::MAXPDrm }, - { X86::MAXPDrr_Int, X86::MAXPDrm_Int }, - { X86::MAXPSrr, X86::MAXPSrm }, - { X86::MAXPSrr_Int, X86::MAXPSrm_Int }, - { X86::MAXSDrr, X86::MAXSDrm }, - { X86::MAXSDrr_Int, X86::MAXSDrm_Int }, - { X86::MAXSSrr, X86::MAXSSrm }, - { X86::MAXSSrr_Int, X86::MAXSSrm_Int }, - { X86::MINPDrr, X86::MINPDrm }, - { X86::MINPDrr_Int, X86::MINPDrm_Int }, - { X86::MINPSrr, X86::MINPSrm }, - { X86::MINPSrr_Int, X86::MINPSrm_Int }, - { X86::MINSDrr, X86::MINSDrm }, - { X86::MINSDrr_Int, X86::MINSDrm_Int }, - { X86::MINSSrr, X86::MINSSrm }, - { X86::MINSSrr_Int, X86::MINSSrm_Int }, - { X86::MULPDrr, X86::MULPDrm }, - { X86::MULPSrr, X86::MULPSrm }, - { X86::MULSDrr, X86::MULSDrm }, - { X86::MULSSrr, X86::MULSSrm }, - { X86::OR16rr, X86::OR16rm }, - { X86::OR32rr, X86::OR32rm }, - { X86::OR64rr, X86::OR64rm }, - { X86::OR8rr, X86::OR8rm }, - { X86::ORPDrr, X86::ORPDrm }, - { X86::ORPSrr, X86::ORPSrm }, - { X86::PACKSSDWrr, X86::PACKSSDWrm }, - { X86::PACKSSWBrr, X86::PACKSSWBrm }, - { X86::PACKUSWBrr, X86::PACKUSWBrm }, - { X86::PADDBrr, X86::PADDBrm }, - { X86::PADDDrr, X86::PADDDrm }, - { X86::PADDQrr, X86::PADDQrm }, - { X86::PADDSBrr, X86::PADDSBrm }, - { X86::PADDSWrr, X86::PADDSWrm }, - { X86::PADDWrr, X86::PADDWrm }, - { X86::PANDNrr, X86::PANDNrm }, - { X86::PANDrr, X86::PANDrm }, - { X86::PAVGBrr, X86::PAVGBrm }, - { X86::PAVGWrr, X86::PAVGWrm }, - { X86::PCMPEQBrr, X86::PCMPEQBrm }, - { X86::PCMPEQDrr, X86::PCMPEQDrm }, - { X86::PCMPEQWrr, X86::PCMPEQWrm }, - { X86::PCMPGTBrr, X86::PCMPGTBrm }, - { X86::PCMPGTDrr, X86::PCMPGTDrm }, - { X86::PCMPGTWrr, X86::PCMPGTWrm }, - { X86::PINSRWrri, X86::PINSRWrmi }, - { X86::PMADDWDrr, X86::PMADDWDrm }, - { X86::PMAXSWrr, X86::PMAXSWrm }, - { X86::PMAXUBrr, X86::PMAXUBrm }, - { X86::PMINSWrr, X86::PMINSWrm }, - { X86::PMINUBrr, X86::PMINUBrm }, - { X86::PMULDQrr, X86::PMULDQrm }, - { X86::PMULDQrr_int, X86::PMULDQrm_int }, - { X86::PMULHUWrr, X86::PMULHUWrm }, - { X86::PMULHWrr, X86::PMULHWrm }, - { X86::PMULLDrr, X86::PMULLDrm }, - { X86::PMULLDrr_int, X86::PMULLDrm_int }, - { X86::PMULLWrr, X86::PMULLWrm }, - { X86::PMULUDQrr, X86::PMULUDQrm }, - { X86::PORrr, X86::PORrm }, - { X86::PSADBWrr, X86::PSADBWrm }, - { X86::PSLLDrr, X86::PSLLDrm }, - { X86::PSLLQrr, X86::PSLLQrm }, - { X86::PSLLWrr, X86::PSLLWrm }, - { X86::PSRADrr, X86::PSRADrm }, - { X86::PSRAWrr, X86::PSRAWrm }, - { X86::PSRLDrr, X86::PSRLDrm }, - { X86::PSRLQrr, X86::PSRLQrm }, - { X86::PSRLWrr, X86::PSRLWrm }, - { X86::PSUBBrr, X86::PSUBBrm }, - { X86::PSUBDrr, X86::PSUBDrm }, - { X86::PSUBSBrr, X86::PSUBSBrm }, - { X86::PSUBSWrr, X86::PSUBSWrm }, - { X86::PSUBWrr, X86::PSUBWrm }, - { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm }, - { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm }, - { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm }, - { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm }, - { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm }, - { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm }, - { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm }, - { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm }, - { X86::PXORrr, X86::PXORrm }, - { X86::SBB32rr, X86::SBB32rm }, - { X86::SBB64rr, X86::SBB64rm }, - { X86::SHUFPDrri, X86::SHUFPDrmi }, - { X86::SHUFPSrri, X86::SHUFPSrmi }, - { X86::SUB16rr, X86::SUB16rm }, - { X86::SUB32rr, X86::SUB32rm }, - { X86::SUB64rr, X86::SUB64rm }, - { X86::SUB8rr, X86::SUB8rm }, - { X86::SUBPDrr, X86::SUBPDrm }, - { X86::SUBPSrr, X86::SUBPSrm }, - { X86::SUBSDrr, X86::SUBSDrm }, - { X86::SUBSSrr, X86::SUBSSrm }, + static const unsigned OpTbl2[][3] = { + { X86::ADC32rr, X86::ADC32rm, 0 }, + { X86::ADC64rr, X86::ADC64rm, 0 }, + { X86::ADD16rr, X86::ADD16rm, 0 }, + { X86::ADD32rr, X86::ADD32rm, 0 }, + { X86::ADD64rr, X86::ADD64rm, 0 }, + { X86::ADD8rr, X86::ADD8rm, 0 }, + { X86::ADDPDrr, X86::ADDPDrm, 16 }, + { X86::ADDPSrr, X86::ADDPSrm, 16 }, + { X86::ADDSDrr, X86::ADDSDrm, 0 }, + { X86::ADDSSrr, X86::ADDSSrm, 0 }, + { X86::ADDSUBPDrr, X86::ADDSUBPDrm, 16 }, + { X86::ADDSUBPSrr, X86::ADDSUBPSrm, 16 }, + { X86::AND16rr, X86::AND16rm, 0 }, + { X86::AND32rr, X86::AND32rm, 0 }, + { X86::AND64rr, X86::AND64rm, 0 }, + { X86::AND8rr, X86::AND8rm, 0 }, + { X86::ANDNPDrr, X86::ANDNPDrm, 16 }, + { X86::ANDNPSrr, X86::ANDNPSrm, 16 }, + { X86::ANDPDrr, X86::ANDPDrm, 16 }, + { X86::ANDPSrr, X86::ANDPSrm, 16 }, + { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, + { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, + { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, + { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, + { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, + { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, + { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, + { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, + { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, + { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, + { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, + { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, + { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, + { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, + { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, + { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, + { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, + { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, + { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, + { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, + { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, + { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, + { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, + { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, + { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, + { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, + { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, + { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, + { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, + { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, + { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, + { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, + { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, + { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, + { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, + { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, + { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, + { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, + { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, + { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, + { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, + { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, + { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, + { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, + { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, + { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, + { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, + { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, + { X86::CMPPDrri, X86::CMPPDrmi, 16 }, + { X86::CMPPSrri, X86::CMPPSrmi, 16 }, + { X86::CMPSDrr, X86::CMPSDrm, 0 }, + { X86::CMPSSrr, X86::CMPSSrm, 0 }, + { X86::DIVPDrr, X86::DIVPDrm, 16 }, + { X86::DIVPSrr, X86::DIVPSrm, 16 }, + { X86::DIVSDrr, X86::DIVSDrm, 0 }, + { X86::DIVSSrr, X86::DIVSSrm, 0 }, + { X86::FsANDNPDrr, X86::FsANDNPDrm, 16 }, + { X86::FsANDNPSrr, X86::FsANDNPSrm, 16 }, + { X86::FsANDPDrr, X86::FsANDPDrm, 16 }, + { X86::FsANDPSrr, X86::FsANDPSrm, 16 }, + { X86::FsORPDrr, X86::FsORPDrm, 16 }, + { X86::FsORPSrr, X86::FsORPSrm, 16 }, + { X86::FsXORPDrr, X86::FsXORPDrm, 16 }, + { X86::FsXORPSrr, X86::FsXORPSrm, 16 }, + { X86::HADDPDrr, X86::HADDPDrm, 16 }, + { X86::HADDPSrr, X86::HADDPSrm, 16 }, + { X86::HSUBPDrr, X86::HSUBPDrm, 16 }, + { X86::HSUBPSrr, X86::HSUBPSrm, 16 }, + { X86::IMUL16rr, X86::IMUL16rm, 0 }, + { X86::IMUL32rr, X86::IMUL32rm, 0 }, + { X86::IMUL64rr, X86::IMUL64rm, 0 }, + { X86::MAXPDrr, X86::MAXPDrm, 16 }, + { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 }, + { X86::MAXPSrr, X86::MAXPSrm, 16 }, + { X86::MAXPSrr_Int, X86::MAXPSrm_Int, 16 }, + { X86::MAXSDrr, X86::MAXSDrm, 0 }, + { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 }, + { X86::MAXSSrr, X86::MAXSSrm, 0 }, + { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 }, + { X86::MINPDrr, X86::MINPDrm, 16 }, + { X86::MINPDrr_Int, X86::MINPDrm_Int, 16 }, + { X86::MINPSrr, X86::MINPSrm, 16 }, + { X86::MINPSrr_Int, X86::MINPSrm_Int, 16 }, + { X86::MINSDrr, X86::MINSDrm, 0 }, + { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 }, + { X86::MINSSrr, X86::MINSSrm, 0 }, + { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 }, + { X86::MULPDrr, X86::MULPDrm, 16 }, + { X86::MULPSrr, X86::MULPSrm, 16 }, + { X86::MULSDrr, X86::MULSDrm, 0 }, + { X86::MULSSrr, X86::MULSSrm, 0 }, + { X86::OR16rr, X86::OR16rm, 0 }, + { X86::OR32rr, X86::OR32rm, 0 }, + { X86::OR64rr, X86::OR64rm, 0 }, + { X86::OR8rr, X86::OR8rm, 0 }, + { X86::ORPDrr, X86::ORPDrm, 16 }, + { X86::ORPSrr, X86::ORPSrm, 16 }, + { X86::PACKSSDWrr, X86::PACKSSDWrm, 16 }, + { X86::PACKSSWBrr, X86::PACKSSWBrm, 16 }, + { X86::PACKUSWBrr, X86::PACKUSWBrm, 16 }, + { X86::PADDBrr, X86::PADDBrm, 16 }, + { X86::PADDDrr, X86::PADDDrm, 16 }, + { X86::PADDQrr, X86::PADDQrm, 16 }, + { X86::PADDSBrr, X86::PADDSBrm, 16 }, + { X86::PADDSWrr, X86::PADDSWrm, 16 }, + { X86::PADDWrr, X86::PADDWrm, 16 }, + { X86::PANDNrr, X86::PANDNrm, 16 }, + { X86::PANDrr, X86::PANDrm, 16 }, + { X86::PAVGBrr, X86::PAVGBrm, 16 }, + { X86::PAVGWrr, X86::PAVGWrm, 16 }, + { X86::PCMPEQBrr, X86::PCMPEQBrm, 16 }, + { X86::PCMPEQDrr, X86::PCMPEQDrm, 16 }, + { X86::PCMPEQWrr, X86::PCMPEQWrm, 16 }, + { X86::PCMPGTBrr, X86::PCMPGTBrm, 16 }, + { X86::PCMPGTDrr, X86::PCMPGTDrm, 16 }, + { X86::PCMPGTWrr, X86::PCMPGTWrm, 16 }, + { X86::PINSRWrri, X86::PINSRWrmi, 16 }, + { X86::PMADDWDrr, X86::PMADDWDrm, 16 }, + { X86::PMAXSWrr, X86::PMAXSWrm, 16 }, + { X86::PMAXUBrr, X86::PMAXUBrm, 16 }, + { X86::PMINSWrr, X86::PMINSWrm, 16 }, + { X86::PMINUBrr, X86::PMINUBrm, 16 }, + { X86::PMULDQrr, X86::PMULDQrm, 16 }, + { X86::PMULHUWrr, X86::PMULHUWrm, 16 }, + { X86::PMULHWrr, X86::PMULHWrm, 16 }, + { X86::PMULLDrr, X86::PMULLDrm, 16 }, + { X86::PMULLDrr_int, X86::PMULLDrm_int, 16 }, + { X86::PMULLWrr, X86::PMULLWrm, 16 }, + { X86::PMULUDQrr, X86::PMULUDQrm, 16 }, + { X86::PORrr, X86::PORrm, 16 }, + { X86::PSADBWrr, X86::PSADBWrm, 16 }, + { X86::PSLLDrr, X86::PSLLDrm, 16 }, + { X86::PSLLQrr, X86::PSLLQrm, 16 }, + { X86::PSLLWrr, X86::PSLLWrm, 16 }, + { X86::PSRADrr, X86::PSRADrm, 16 }, + { X86::PSRAWrr, X86::PSRAWrm, 16 }, + { X86::PSRLDrr, X86::PSRLDrm, 16 }, + { X86::PSRLQrr, X86::PSRLQrm, 16 }, + { X86::PSRLWrr, X86::PSRLWrm, 16 }, + { X86::PSUBBrr, X86::PSUBBrm, 16 }, + { X86::PSUBDrr, X86::PSUBDrm, 16 }, + { X86::PSUBSBrr, X86::PSUBSBrm, 16 }, + { X86::PSUBSWrr, X86::PSUBSWrm, 16 }, + { X86::PSUBWrr, X86::PSUBWrm, 16 }, + { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, 16 }, + { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, 16 }, + { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, 16 }, + { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, 16 }, + { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, 16 }, + { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, 16 }, + { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, 16 }, + { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, 16 }, + { X86::PXORrr, X86::PXORrm, 16 }, + { X86::SBB32rr, X86::SBB32rm, 0 }, + { X86::SBB64rr, X86::SBB64rm, 0 }, + { X86::SHUFPDrri, X86::SHUFPDrmi, 16 }, + { X86::SHUFPSrri, X86::SHUFPSrmi, 16 }, + { X86::SUB16rr, X86::SUB16rm, 0 }, + { X86::SUB32rr, X86::SUB32rm, 0 }, + { X86::SUB64rr, X86::SUB64rm, 0 }, + { X86::SUB8rr, X86::SUB8rm, 0 }, + { X86::SUBPDrr, X86::SUBPDrm, 16 }, + { X86::SUBPSrr, X86::SUBPSrm, 16 }, + { X86::SUBSDrr, X86::SUBSDrm, 0 }, + { X86::SUBSSrr, X86::SUBSSrm, 0 }, // FIXME: TEST*rr -> swapped operand of TEST*mr. - { X86::UNPCKHPDrr, X86::UNPCKHPDrm }, - { X86::UNPCKHPSrr, X86::UNPCKHPSrm }, - { X86::UNPCKLPDrr, X86::UNPCKLPDrm }, - { X86::UNPCKLPSrr, X86::UNPCKLPSrm }, - { X86::XOR16rr, X86::XOR16rm }, - { X86::XOR32rr, X86::XOR32rm }, - { X86::XOR64rr, X86::XOR64rm }, - { X86::XOR8rr, X86::XOR8rm }, - { X86::XORPDrr, X86::XORPDrm }, - { X86::XORPSrr, X86::XORPSrm } + { X86::UNPCKHPDrr, X86::UNPCKHPDrm, 16 }, + { X86::UNPCKHPSrr, X86::UNPCKHPSrm, 16 }, + { X86::UNPCKLPDrr, X86::UNPCKLPDrm, 16 }, + { X86::UNPCKLPSrr, X86::UNPCKLPSrm, 16 }, + { X86::XOR16rr, X86::XOR16rm, 0 }, + { X86::XOR32rr, X86::XOR32rm, 0 }, + { X86::XOR64rr, X86::XOR64rm, 0 }, + { X86::XOR8rr, X86::XOR8rm, 0 }, + { X86::XORPDrr, X86::XORPDrm, 16 }, + { X86::XORPSrr, X86::XORPSrm, 16 } }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { unsigned RegOp = OpTbl2[i][0]; unsigned MemOp = OpTbl2[i][1]; + unsigned Align = OpTbl2[i][2]; if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp, - MemOp)).second) + std::make_pair(MemOp,Align))).second) assert(false && "Duplicated entries?"); - unsigned AuxInfo = 2 | (1 << 4); // Index 1, folded load + // Index 2, folded load + unsigned AuxInfo = 2 | (1 << 4); if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, std::make_pair(RegOp, AuxInfo))).second) AmbEntries.push_back(MemOp); @@ -654,17 +668,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) } bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, - unsigned& sourceReg, - unsigned& destReg) const { + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const { switch (MI.getOpcode()) { default: return false; case X86::MOV8rr: + case X86::MOV8rr_NOREX: case X86::MOV16rr: case X86::MOV32rr: case X86::MOV64rr: - case X86::MOV16to16_: - case X86::MOV32to32_: case X86::MOVSSrr: case X86::MOVSDrr: @@ -677,18 +690,20 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, case X86::FsMOVAPDrr: case X86::MOVAPSrr: case X86::MOVAPDrr: + case X86::MOVDQArr: case X86::MOVSS2PSrr: case X86::MOVSD2PDrr: case X86::MOVPS2SSrr: case X86::MOVPD2SDrr: - case X86::MMX_MOVD64rr: case X86::MMX_MOVQ64rr: assert(MI.getNumOperands() >= 2 && MI.getOperand(0).isReg() && MI.getOperand(1).isReg() && "invalid register-register move instruction"); - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SrcSubIdx = MI.getOperand(1).getSubReg(); + DstSubIdx = MI.getOperand(0).getSubReg(); return true; } } @@ -699,15 +714,14 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, default: break; case X86::MOV8rm: case X86::MOV16rm: - case X86::MOV16_rm: case X86::MOV32rm: - case X86::MOV32_rm: case X86::MOV64rm: case X86::LD_Fp64m: case X86::MOVSSrm: case X86::MOVSDrm: case X86::MOVAPSrm: case X86::MOVAPDrm: + case X86::MOVDQArm: case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() && @@ -729,15 +743,14 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, default: break; case X86::MOV8mr: case X86::MOV16mr: - case X86::MOV16_mr: case X86::MOV32mr: - case X86::MOV32_mr: case X86::MOV64mr: case X86::ST_FpP64m: case X86::MOVSSmr: case X86::MOVSDmr: case X86::MOVAPSmr: case X86::MOVAPDmr: + case X86::MOVDQAmr: case X86::MMX_MOVD64mr: case X86::MMX_MOVQ64mr: case X86::MMX_MOVNTQmr: @@ -747,14 +760,13 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, MI->getOperand(2).getReg() == 0 && MI->getOperand(3).getImm() == 0) { FrameIndex = MI->getOperand(0).getIndex(); - return MI->getOperand(4).getReg(); + return MI->getOperand(X86AddrNumOperands).getReg(); } break; } return 0; } - /// regIsPICBase - Return true if register is PIC base (i.e.g defined by /// X86::MOVPC32r. static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { @@ -770,10 +782,27 @@ static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { return isPICBase; } -/// isGVStub - Return true if the GV requires an extra load to get the -/// real address. -static inline bool isGVStub(GlobalValue *GV, X86TargetMachine &TM) { - return TM.getSubtarget().GVRequiresExtraLoad(GV, TM, false); +/// CanRematLoadWithDispOperand - Return true if a load with the specified +/// operand is a candidate for remat: for this to be true we need to know that +/// the load will always return the same value, even if moved. +static bool CanRematLoadWithDispOperand(const MachineOperand &MO, + X86TargetMachine &TM) { + // Loads from constant pool entries can be remat'd. + if (MO.isCPI()) return true; + + // We can remat globals in some cases. + if (MO.isGlobal()) { + // If this is a load of a stub, not of the global, we can remat it. This + // access will always return the address of the global. + if (isGlobalStubReference(MO.getTargetFlags())) + return true; + + // If the global itself is constant, we can remat the load. + if (GlobalVariable *GV = dyn_cast(MO.getGlobal())) + if (GV->isConstant()) + return true; + } + return false; } bool @@ -782,26 +811,23 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const { default: break; case X86::MOV8rm: case X86::MOV16rm: - case X86::MOV16_rm: case X86::MOV32rm: - case X86::MOV32_rm: case X86::MOV64rm: case X86::LD_Fp64m: case X86::MOVSSrm: case X86::MOVSDrm: case X86::MOVAPSrm: case X86::MOVAPDrm: + case X86::MOVDQArm: case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: { // Loads from constant pools are trivially rematerializable. if (MI->getOperand(1).isReg() && MI->getOperand(2).isImm() && MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && - (MI->getOperand(4).isCPI() || - (MI->getOperand(4).isGlobal() && - isGVStub(MI->getOperand(4).getGlobal(), TM)))) { + CanRematLoadWithDispOperand(MI->getOperand(4), TM)) { unsigned BaseReg = MI->getOperand(1).getReg(); - if (BaseReg == 0) + if (BaseReg == 0 || BaseReg == X86::RIP) return true; // Allow re-materialization of PIC load. if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) @@ -888,11 +914,11 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, + unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig) const { - unsigned SubIdx = Orig->getOperand(0).isReg() - ? Orig->getOperand(0).getSubReg() : 0; - bool ChangeSubIdx = SubIdx != 0; + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { DestReg = RI.getSubReg(DestReg, SubIdx); SubIdx = 0; @@ -900,39 +926,36 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, // MOV32r0 etc. are implemented with xor which clobbers condition code. // Re-materialize them as movri instructions to avoid side effects. - bool Emitted = false; - switch (Orig->getOpcode()) { + bool Clone = true; + unsigned Opc = Orig->getOpcode(); + switch (Opc) { default: break; case X86::MOV8r0: case X86::MOV16r0: - case X86::MOV32r0: - case X86::MOV64r0: { + case X86::MOV32r0: { if (!isSafeToClobberEFLAGS(MBB, I)) { - unsigned Opc = 0; - switch (Orig->getOpcode()) { + switch (Opc) { default: break; case X86::MOV8r0: Opc = X86::MOV8ri; break; case X86::MOV16r0: Opc = X86::MOV16ri; break; case X86::MOV32r0: Opc = X86::MOV32ri; break; - case X86::MOV64r0: Opc = X86::MOV64ri32; break; } - BuildMI(MBB, I, get(Opc), DestReg).addImm(0); - Emitted = true; + Clone = false; } break; } } - if (!Emitted) { + if (Clone) { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); MI->getOperand(0).setReg(DestReg); MBB.insert(I, MI); + } else { + BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0); } - if (ChangeSubIdx) { - MachineInstr *NewMI = prior(I); - NewMI->getOperand(0).setSubReg(SubIdx); - } + MachineInstr *NewMI = prior(I); + NewMI->getOperand(0).setSubReg(SubIdx); } /// isInvariantLoad - Return true if the specified instruction (which is marked @@ -956,7 +979,7 @@ bool X86InstrInfo::isInvariantLoad(const MachineInstr *MI) const { return true; if (MO.isGlobal()) - return isGVStub(MO.getGlobal(), TM); + return isGlobalStubReference(MO.getTargetFlags()); // If this is a load from an invariant stack slot, the load is a constant. if (MO.isFI()) { @@ -1023,8 +1046,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (B != C) return 0; unsigned A = MI->getOperand(0).getReg(); unsigned M = MI->getOperand(3).getImm(); - NewMI = BuildMI(MF, get(X86::PSHUFDri)).addReg(A, true, false, false, isDead) - .addReg(B, false, false, isKill).addImm(M); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) + .addReg(A, RegState::Define | getDeadRegState(isDead)) + .addReg(B, getKillRegState(isKill)).addImm(M); break; } case X86::SHL64ri: { @@ -1034,8 +1058,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned ShAmt = MI->getOperand(2).getImm(); if (ShAmt == 0 || ShAmt >= 4) return 0; - NewMI = BuildMI(MF, get(X86::LEA64r)).addReg(Dest, true, false, false, isDead) - .addReg(0).addImm(1 << ShAmt).addReg(Src, false, false, isKill).addImm(0); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) + .addReg(Dest, RegState::Define | getDeadRegState(isDead)) + .addReg(0).addImm(1 << ShAmt) + .addReg(Src, getKillRegState(isKill)) + .addImm(0); break; } case X86::SHL32ri: { @@ -1047,9 +1074,10 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned Opc = TM.getSubtarget().is64Bit() ? X86::LEA64_32r : X86::LEA32r; - NewMI = BuildMI(MF, get(Opc)).addReg(Dest, true, false, false, isDead) + NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(0).addImm(1 << ShAmt) - .addReg(Src, false, false, isKill).addImm(0); + .addReg(Src, getKillRegState(isKill)).addImm(0); break; } case X86::SHL16ri: { @@ -1069,17 +1097,24 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // Build and insert into an implicit UNDEF value. This is OK because // well be shifting and then extracting the lower 16-bits. - BuildMI(*MFI, MBBI, get(X86::IMPLICIT_DEF), leaInReg); - MachineInstr *InsMI = BuildMI(*MFI, MBBI, get(X86::INSERT_SUBREG),leaInReg) - .addReg(leaInReg).addReg(Src, false, false, isKill) + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); + MachineInstr *InsMI = + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) + .addReg(leaInReg) + .addReg(Src, getKillRegState(isKill)) .addImm(X86::SUBREG_16BIT); - NewMI = BuildMI(*MFI, MBBI, get(Opc), leaOutReg).addReg(0).addImm(1 << ShAmt) - .addReg(leaInReg, false, false, true).addImm(0); + NewMI = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg) + .addReg(0).addImm(1 << ShAmt) + .addReg(leaInReg, RegState::Kill) + .addImm(0); - MachineInstr *ExtMI = BuildMI(*MFI, MBBI, get(X86::EXTRACT_SUBREG)) - .addReg(Dest, true, false, false, isDead) - .addReg(leaOutReg, false, false, true).addImm(X86::SUBREG_16BIT); + MachineInstr *ExtMI = + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) + .addReg(Dest, RegState::Define | getDeadRegState(isDead)) + .addReg(leaOutReg, RegState::Kill) + .addImm(X86::SUBREG_16BIT); + if (LV) { // Update live variables LV->getVarInfo(leaInReg).Kills.push_back(NewMI); @@ -1091,9 +1126,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } return ExtMI; } else { - NewMI = BuildMI(MF, get(X86::LEA16r)).addReg(Dest, true, false, false, isDead) + NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(0).addImm(1 << ShAmt) - .addReg(Src, false, false, isKill).addImm(0); + .addReg(Src, getKillRegState(isKill)) + .addImm(0); } break; } @@ -1108,39 +1145,45 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, switch (MIOpc) { default: return 0; case X86::INC64r: - case X86::INC32r: { + case X86::INC32r: + case X86::INC64_32r: { assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r : (is64Bit ? X86::LEA64_32r : X86::LEA32r); - NewMI = addRegOffset(BuildMI(MF, get(Opc)) - .addReg(Dest, true, false, false, isDead), - Src, isKill, 1); + NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), + Src, isKill, 1); break; } case X86::INC16r: case X86::INC64_16r: if (DisableLEA16) return 0; assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); - NewMI = addRegOffset(BuildMI(MF, get(X86::LEA16r)) - .addReg(Dest, true, false, false, isDead), + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), Src, isKill, 1); break; case X86::DEC64r: - case X86::DEC32r: { + case X86::DEC32r: + case X86::DEC64_32r: { assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r : (is64Bit ? X86::LEA64_32r : X86::LEA32r); - NewMI = addRegOffset(BuildMI(MF, get(Opc)) - .addReg(Dest, true, false, false, isDead), - Src, isKill, -1); + NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), + Src, isKill, -1); break; } case X86::DEC16r: case X86::DEC64_16r: if (DisableLEA16) return 0; assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); - NewMI = addRegOffset(BuildMI(MF, get(X86::LEA16r)) - .addReg(Dest, true, false, false, isDead), + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), Src, isKill, -1); break; case X86::ADD64rr: @@ -1150,8 +1193,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, : (is64Bit ? X86::LEA64_32r : X86::LEA32r); unsigned Src2 = MI->getOperand(2).getReg(); bool isKill2 = MI->getOperand(2).isKill(); - NewMI = addRegReg(BuildMI(MF, get(Opc)) - .addReg(Dest, true, false, false, isDead), + NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), Src, isKill, Src2, isKill2); if (LV && isKill2) LV->replaceKillInstruction(Src2, MI, NewMI); @@ -1162,8 +1206,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); unsigned Src2 = MI->getOperand(2).getReg(); bool isKill2 = MI->getOperand(2).isKill(); - NewMI = addRegReg(BuildMI(MF, get(X86::LEA16r)) - .addReg(Dest, true, false, false, isDead), + NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), Src, isKill, Src2, isKill2); if (LV && isKill2) LV->replaceKillInstruction(Src2, MI, NewMI); @@ -1173,18 +1218,20 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD64ri8: assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); if (MI->getOperand(2).isImm()) - NewMI = addRegOffset(BuildMI(MF, get(X86::LEA64r)) - .addReg(Dest, true, false, false, isDead), - Src, isKill, MI->getOperand(2).getImm()); + NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), + Src, isKill, MI->getOperand(2).getImm()); break; case X86::ADD32ri: case X86::ADD32ri8: assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); if (MI->getOperand(2).isImm()) { unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; - NewMI = addRegOffset(BuildMI(MF, get(Opc)) - .addReg(Dest, true, false, false, isDead), - Src, isKill, MI->getOperand(2).getImm()); + NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), + Src, isKill, MI->getOperand(2).getImm()); } break; case X86::ADD16ri: @@ -1192,8 +1239,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (DisableLEA16) return 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); if (MI->getOperand(2).isImm()) - NewMI = addRegOffset(BuildMI(MF, get(X86::LEA16r)) - .addReg(Dest, true, false, false, isDead), + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), Src, isKill, MI->getOperand(2).getImm()); break; case X86::SHL16ri: @@ -1210,8 +1258,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned Opc = MIOpc == X86::SHL64ri ? X86::LEA64r : (MIOpc == X86::SHL32ri ? (is64Bit ? X86::LEA64_32r : X86::LEA32r) : X86::LEA16r); - NewMI = addFullAddress(BuildMI(MF, get(Opc)) - .addReg(Dest, true, false, false, isDead), AM); + NewMI = addFullAddress(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), AM); if (isKill) NewMI->getOperand(3).setIsKill(true); } @@ -1249,7 +1298,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { unsigned Opc; unsigned Size; switch (MI->getOpcode()) { - default: assert(0 && "Unreachable!"); + default: llvm_unreachable("Unreachable!"); case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break; case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break; case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break; @@ -1308,7 +1357,13 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { case X86::CMOVP64rr: case X86::CMOVNP16rr: case X86::CMOVNP32rr: - case X86::CMOVNP64rr: { + case X86::CMOVNP64rr: + case X86::CMOVO16rr: + case X86::CMOVO32rr: + case X86::CMOVO64rr: + case X86::CMOVNO16rr: + case X86::CMOVNO32rr: + case X86::CMOVNO64rr: { unsigned Opc = 0; switch (MI->getOpcode()) { default: break; @@ -1344,16 +1399,22 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break; case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break; case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break; - case X86::CMOVS64rr: Opc = X86::CMOVNS32rr; break; + case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break; case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break; case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break; case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break; case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break; case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break; - case X86::CMOVP64rr: Opc = X86::CMOVNP32rr; break; + case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break; case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break; case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break; case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break; + case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break; + case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break; + case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break; + case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break; + case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break; + case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break; } if (NewMI) { MachineFunction &MF = *MI->getParent()->getParent(); @@ -1387,14 +1448,12 @@ static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { case X86::JNP: return X86::COND_NP; case X86::JO: return X86::COND_O; case X86::JNO: return X86::COND_NO; - case X86::JC: return X86::COND_C; - case X86::JNC: return X86::COND_NC; } } unsigned X86::GetCondBranchFromCond(X86::CondCode CC) { switch (CC) { - default: assert(0 && "Illegal condition code!"); + default: llvm_unreachable("Illegal condition code!"); case X86::COND_E: return X86::JE; case X86::COND_NE: return X86::JNE; case X86::COND_L: return X86::JL; @@ -1411,8 +1470,6 @@ unsigned X86::GetCondBranchFromCond(X86::CondCode CC) { case X86::COND_NP: return X86::JNP; case X86::COND_O: return X86::JO; case X86::COND_NO: return X86::JNO; - case X86::COND_C: return X86::JC; - case X86::COND_NC: return X86::JNC; } } @@ -1420,7 +1477,7 @@ unsigned X86::GetCondBranchFromCond(X86::CondCode CC) { /// e.g. turning COND_E to COND_NE. X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { switch (CC) { - default: assert(0 && "Illegal condition code!"); + default: llvm_unreachable("Illegal condition code!"); case X86::COND_E: return X86::COND_NE; case X86::COND_NE: return X86::COND_E; case X86::COND_L: return X86::COND_GE; @@ -1437,8 +1494,6 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { case X86::COND_NP: return X86::COND_P; case X86::COND_O: return X86::COND_NO; case X86::COND_NO: return X86::COND_O; - case X86::COND_C: return X86::COND_NC; - case X86::COND_NC: return X86::COND_C; } } @@ -1465,7 +1520,8 @@ static bool isBrAnalysisUnpredicatedTerminator(const MachineInstr *MI, bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, - SmallVectorImpl &Cond) const { + SmallVectorImpl &Cond, + bool AllowModify) const { // Start from the bottom of the block and work up, examining the // terminator instructions. MachineBasicBlock::iterator I = MBB.end(); @@ -1481,6 +1537,11 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return true; // Handle unconditional branches. if (I->getOpcode() == X86::JMP) { + if (!AllowModify) { + TBB = I->getOperand(0).getMBB(); + continue; + } + // If the block has any instructions after a JMP, delete them. while (next(I) != MBB.end()) next(I)->eraseFromParent(); @@ -1563,33 +1624,12 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return Count; } -static const MachineInstrBuilder &X86InstrAddOperand(MachineInstrBuilder &MIB, - const MachineOperand &MO) { - if (MO.isReg()) - MIB = MIB.addReg(MO.getReg(), MO.isDef(), MO.isImplicit(), - MO.isKill(), MO.isDead(), MO.getSubReg()); - else if (MO.isImm()) - MIB = MIB.addImm(MO.getImm()); - else if (MO.isFI()) - MIB = MIB.addFrameIndex(MO.getIndex()); - else if (MO.isGlobal()) - MIB = MIB.addGlobalAddress(MO.getGlobal(), MO.getOffset()); - else if (MO.isCPI()) - MIB = MIB.addConstantPoolIndex(MO.getIndex(), MO.getOffset()); - else if (MO.isJTI()) - MIB = MIB.addJumpTableIndex(MO.getIndex()); - else if (MO.isSymbol()) - MIB = MIB.addExternalSymbol(MO.getSymbolName()); - else - assert(0 && "Unknown operand for X86InstrAddOperand!"); - - return MIB; -} - unsigned X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl &Cond) const { + // FIXME this should probably have a DebugLoc operand + DebugLoc dl = DebugLoc::getUnknownLoc(); // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && @@ -1598,7 +1638,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, if (Cond.empty()) { // Unconditional branch? assert(!FBB && "Unconditional branch with multiple successors!"); - BuildMI(&MBB, get(X86::JMP)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JMP)).addMBB(TBB); return 1; } @@ -1608,99 +1648,154 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, switch (CC) { case X86::COND_NP_OR_E: // Synthesize NP_OR_E with two branches. - BuildMI(&MBB, get(X86::JNP)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JNP)).addMBB(TBB); ++Count; - BuildMI(&MBB, get(X86::JE)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JE)).addMBB(TBB); ++Count; break; case X86::COND_NE_OR_P: // Synthesize NE_OR_P with two branches. - BuildMI(&MBB, get(X86::JNE)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JNE)).addMBB(TBB); ++Count; - BuildMI(&MBB, get(X86::JP)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JP)).addMBB(TBB); ++Count; break; default: { unsigned Opc = GetCondBranchFromCond(CC); - BuildMI(&MBB, get(Opc)).addMBB(TBB); + BuildMI(&MBB, dl, get(Opc)).addMBB(TBB); ++Count; } } if (FBB) { // Two-way Conditional branch. Insert the second branch. - BuildMI(&MBB, get(X86::JMP)).addMBB(FBB); + BuildMI(&MBB, dl, get(X86::JMP)).addMBB(FBB); ++Count; } return Count; } +/// isHReg - Test if the given register is a physical h register. +static bool isHReg(unsigned Reg) { + return X86::GR8_ABCD_HRegClass.contains(Reg); +} + bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC) const { - if (DestRC == SrcRC) { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + // Determine if DstRC and SrcRC have a common superclass in common. + const TargetRegisterClass *CommonRC = DestRC; + if (DestRC == SrcRC) + /* Source and destination have the same register class. */; + else if (CommonRC->hasSuperClass(SrcRC)) + CommonRC = SrcRC; + else if (!DestRC->hasSubClass(SrcRC)) { + // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other, + // but we want to copy then as GR64. Similarly, for GR32_NOREX and + // GR32_NOSP, copy as GR32. + if (SrcRC->hasSuperClass(&X86::GR64RegClass) && + DestRC->hasSuperClass(&X86::GR64RegClass)) + CommonRC = &X86::GR64RegClass; + else if (SrcRC->hasSuperClass(&X86::GR32RegClass) && + DestRC->hasSuperClass(&X86::GR32RegClass)) + CommonRC = &X86::GR32RegClass; + else + CommonRC = 0; + } + + if (CommonRC) { unsigned Opc; - if (DestRC == &X86::GR64RegClass) { + if (CommonRC == &X86::GR64RegClass || CommonRC == &X86::GR64_NOSPRegClass) { + Opc = X86::MOV64rr; + } else if (CommonRC == &X86::GR32RegClass || + CommonRC == &X86::GR32_NOSPRegClass) { + Opc = X86::MOV32rr; + } else if (CommonRC == &X86::GR16RegClass) { + Opc = X86::MOV16rr; + } else if (CommonRC == &X86::GR8RegClass) { + // Copying to or from a physical H register on x86-64 requires a NOREX + // move. Otherwise use a normal move. + if ((isHReg(DestReg) || isHReg(SrcReg)) && + TM.getSubtarget().is64Bit()) + Opc = X86::MOV8rr_NOREX; + else + Opc = X86::MOV8rr; + } else if (CommonRC == &X86::GR64_ABCDRegClass) { Opc = X86::MOV64rr; - } else if (DestRC == &X86::GR32RegClass) { + } else if (CommonRC == &X86::GR32_ABCDRegClass) { Opc = X86::MOV32rr; - } else if (DestRC == &X86::GR16RegClass) { + } else if (CommonRC == &X86::GR16_ABCDRegClass) { Opc = X86::MOV16rr; - } else if (DestRC == &X86::GR8RegClass) { + } else if (CommonRC == &X86::GR8_ABCD_LRegClass) { Opc = X86::MOV8rr; - } else if (DestRC == &X86::GR32_RegClass) { - Opc = X86::MOV32_rr; - } else if (DestRC == &X86::GR16_RegClass) { - Opc = X86::MOV16_rr; - } else if (DestRC == &X86::RFP32RegClass) { + } else if (CommonRC == &X86::GR8_ABCD_HRegClass) { + if (TM.getSubtarget().is64Bit()) + Opc = X86::MOV8rr_NOREX; + else + Opc = X86::MOV8rr; + } else if (CommonRC == &X86::GR64_NOREXRegClass || + CommonRC == &X86::GR64_NOREX_NOSPRegClass) { + Opc = X86::MOV64rr; + } else if (CommonRC == &X86::GR32_NOREXRegClass) { + Opc = X86::MOV32rr; + } else if (CommonRC == &X86::GR16_NOREXRegClass) { + Opc = X86::MOV16rr; + } else if (CommonRC == &X86::GR8_NOREXRegClass) { + Opc = X86::MOV8rr; + } else if (CommonRC == &X86::RFP32RegClass) { Opc = X86::MOV_Fp3232; - } else if (DestRC == &X86::RFP64RegClass || DestRC == &X86::RSTRegClass) { + } else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) { Opc = X86::MOV_Fp6464; - } else if (DestRC == &X86::RFP80RegClass) { + } else if (CommonRC == &X86::RFP80RegClass) { Opc = X86::MOV_Fp8080; - } else if (DestRC == &X86::FR32RegClass) { + } else if (CommonRC == &X86::FR32RegClass) { Opc = X86::FsMOVAPSrr; - } else if (DestRC == &X86::FR64RegClass) { + } else if (CommonRC == &X86::FR64RegClass) { Opc = X86::FsMOVAPDrr; - } else if (DestRC == &X86::VR128RegClass) { + } else if (CommonRC == &X86::VR128RegClass) { Opc = X86::MOVAPSrr; - } else if (DestRC == &X86::VR64RegClass) { + } else if (CommonRC == &X86::VR64RegClass) { Opc = X86::MMX_MOVQ64rr; } else { return false; } - BuildMI(MBB, MI, get(Opc), DestReg).addReg(SrcReg); + BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg); return true; } - + // Moving EFLAGS to / from another register requires a push and a pop. if (SrcRC == &X86::CCRRegClass) { if (SrcReg != X86::EFLAGS) return false; - if (DestRC == &X86::GR64RegClass) { - BuildMI(MBB, MI, get(X86::PUSHFQ)); - BuildMI(MBB, MI, get(X86::POP64r), DestReg); + if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { + BuildMI(MBB, MI, DL, get(X86::PUSHFQ)); + BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); return true; - } else if (DestRC == &X86::GR32RegClass) { - BuildMI(MBB, MI, get(X86::PUSHFD)); - BuildMI(MBB, MI, get(X86::POP32r), DestReg); + } else if (DestRC == &X86::GR32RegClass || + DestRC == &X86::GR32_NOSPRegClass) { + BuildMI(MBB, MI, DL, get(X86::PUSHFD)); + BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); return true; } } else if (DestRC == &X86::CCRRegClass) { if (DestReg != X86::EFLAGS) return false; - if (SrcRC == &X86::GR64RegClass) { - BuildMI(MBB, MI, get(X86::PUSH64r)).addReg(SrcReg); - BuildMI(MBB, MI, get(X86::POPFQ)); + if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { + BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg); + BuildMI(MBB, MI, DL, get(X86::POPFQ)); return true; - } else if (SrcRC == &X86::GR32RegClass) { - BuildMI(MBB, MI, get(X86::PUSH32r)).addReg(SrcReg); - BuildMI(MBB, MI, get(X86::POPFD)); + } else if (SrcRC == &X86::GR32RegClass || + DestRC == &X86::GR32_NOSPRegClass) { + BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg); + BuildMI(MBB, MI, DL, get(X86::POPFD)); return true; } } - + // Moving from ST(0) turns into FpGET_ST0_32 etc. if (SrcRC == &X86::RSTRegClass) { // Copying from ST(0)/ST(1). @@ -1718,27 +1813,28 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, return false; Opc = isST0 ? X86::FpGET_ST0_80 : X86::FpGET_ST1_80; } - BuildMI(MBB, MI, get(Opc), DestReg); + BuildMI(MBB, MI, DL, get(Opc), DestReg); return true; } // Moving to ST(0) turns into FpSET_ST0_32 etc. if (DestRC == &X86::RSTRegClass) { - // Copying to ST(0). FIXME: handle ST(1) also - if (DestReg != X86::ST0) + // Copying to ST(0) / ST(1). + if (DestReg != X86::ST0 && DestReg != X86::ST1) // Can only copy to TOS right now return false; + bool isST0 = DestReg == X86::ST0; unsigned Opc; if (SrcRC == &X86::RFP32RegClass) - Opc = X86::FpSET_ST0_32; + Opc = isST0 ? X86::FpSET_ST0_32 : X86::FpSET_ST1_32; else if (SrcRC == &X86::RFP64RegClass) - Opc = X86::FpSET_ST0_64; + Opc = isST0 ? X86::FpSET_ST0_64 : X86::FpSET_ST1_64; else { if (SrcRC != &X86::RFP80RegClass) return false; - Opc = X86::FpSET_ST0_80; + Opc = isST0 ? X86::FpSET_ST0_80 : X86::FpSET_ST1_80; } - BuildMI(MBB, MI, get(Opc)).addReg(SrcReg); + BuildMI(MBB, MI, DL, get(Opc)).addReg(SrcReg); return true; } @@ -1746,21 +1842,47 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, return false; } -static unsigned getStoreRegOpcode(const TargetRegisterClass *RC, - bool isStackAligned) { +static unsigned getStoreRegOpcode(unsigned SrcReg, + const TargetRegisterClass *RC, + bool isStackAligned, + TargetMachine &TM) { unsigned Opc = 0; - if (RC == &X86::GR64RegClass) { + if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { Opc = X86::MOV64mr; - } else if (RC == &X86::GR32RegClass) { + } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { Opc = X86::MOV32mr; } else if (RC == &X86::GR16RegClass) { Opc = X86::MOV16mr; } else if (RC == &X86::GR8RegClass) { + // Copying to or from a physical H register on x86-64 requires a NOREX + // move. Otherwise use a normal move. + if (isHReg(SrcReg) && + TM.getSubtarget().is64Bit()) + Opc = X86::MOV8mr_NOREX; + else + Opc = X86::MOV8mr; + } else if (RC == &X86::GR64_ABCDRegClass) { + Opc = X86::MOV64mr; + } else if (RC == &X86::GR32_ABCDRegClass) { + Opc = X86::MOV32mr; + } else if (RC == &X86::GR16_ABCDRegClass) { + Opc = X86::MOV16mr; + } else if (RC == &X86::GR8_ABCD_LRegClass) { + Opc = X86::MOV8mr; + } else if (RC == &X86::GR8_ABCD_HRegClass) { + if (TM.getSubtarget().is64Bit()) + Opc = X86::MOV8mr_NOREX; + else + Opc = X86::MOV8mr; + } else if (RC == &X86::GR64_NOREXRegClass || + RC == &X86::GR64_NOREX_NOSPRegClass) { + Opc = X86::MOV64mr; + } else if (RC == &X86::GR32_NOREXRegClass) { + Opc = X86::MOV32mr; + } else if (RC == &X86::GR16_NOREXRegClass) { + Opc = X86::MOV16mr; + } else if (RC == &X86::GR8_NOREXRegClass) { Opc = X86::MOV8mr; - } else if (RC == &X86::GR32_RegClass) { - Opc = X86::MOV32_mr; - } else if (RC == &X86::GR16_RegClass) { - Opc = X86::MOV16_mr; } else if (RC == &X86::RFP80RegClass) { Opc = X86::ST_FpP80m; // pops } else if (RC == &X86::RFP64RegClass) { @@ -1777,8 +1899,7 @@ static unsigned getStoreRegOpcode(const TargetRegisterClass *RC, } else if (RC == &X86::VR64RegClass) { Opc = X86::MMX_MOVQ64mr; } else { - assert(0 && "Unknown regclass"); - abort(); + llvm_unreachable("Unknown regclass"); } return Opc; @@ -1791,9 +1912,11 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const MachineFunction &MF = *MBB.getParent(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); - unsigned Opc = getStoreRegOpcode(RC, isAligned); - addFrameReference(BuildMI(MBB, MI, get(Opc)), FrameIdx) - .addReg(SrcReg, false, false, isKill); + unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) + .addReg(SrcReg, getKillRegState(isKill)); } void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, @@ -1803,29 +1926,56 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, SmallVectorImpl &NewMIs) const { bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); - unsigned Opc = getStoreRegOpcode(RC, isAligned); - MachineInstrBuilder MIB = BuildMI(MF, get(Opc)); + unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); + DebugLoc DL = DebugLoc::getUnknownLoc(); + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB = X86InstrAddOperand(MIB, Addr[i]); - MIB.addReg(SrcReg, false, false, isKill); + MIB.addOperand(Addr[i]); + MIB.addReg(SrcReg, getKillRegState(isKill)); NewMIs.push_back(MIB); } -static unsigned getLoadRegOpcode(const TargetRegisterClass *RC, - bool isStackAligned) { +static unsigned getLoadRegOpcode(unsigned DestReg, + const TargetRegisterClass *RC, + bool isStackAligned, + const TargetMachine &TM) { unsigned Opc = 0; - if (RC == &X86::GR64RegClass) { + if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { Opc = X86::MOV64rm; - } else if (RC == &X86::GR32RegClass) { + } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { Opc = X86::MOV32rm; } else if (RC == &X86::GR16RegClass) { Opc = X86::MOV16rm; } else if (RC == &X86::GR8RegClass) { + // Copying to or from a physical H register on x86-64 requires a NOREX + // move. Otherwise use a normal move. + if (isHReg(DestReg) && + TM.getSubtarget().is64Bit()) + Opc = X86::MOV8rm_NOREX; + else + Opc = X86::MOV8rm; + } else if (RC == &X86::GR64_ABCDRegClass) { + Opc = X86::MOV64rm; + } else if (RC == &X86::GR32_ABCDRegClass) { + Opc = X86::MOV32rm; + } else if (RC == &X86::GR16_ABCDRegClass) { + Opc = X86::MOV16rm; + } else if (RC == &X86::GR8_ABCD_LRegClass) { + Opc = X86::MOV8rm; + } else if (RC == &X86::GR8_ABCD_HRegClass) { + if (TM.getSubtarget().is64Bit()) + Opc = X86::MOV8rm_NOREX; + else + Opc = X86::MOV8rm; + } else if (RC == &X86::GR64_NOREXRegClass || + RC == &X86::GR64_NOREX_NOSPRegClass) { + Opc = X86::MOV64rm; + } else if (RC == &X86::GR32_NOREXRegClass) { + Opc = X86::MOV32rm; + } else if (RC == &X86::GR16_NOREXRegClass) { + Opc = X86::MOV16rm; + } else if (RC == &X86::GR8_NOREXRegClass) { Opc = X86::MOV8rm; - } else if (RC == &X86::GR32_RegClass) { - Opc = X86::MOV32_rm; - } else if (RC == &X86::GR16_RegClass) { - Opc = X86::MOV16_rm; } else if (RC == &X86::RFP80RegClass) { Opc = X86::LD_Fp80m; } else if (RC == &X86::RFP64RegClass) { @@ -1842,8 +1992,7 @@ static unsigned getLoadRegOpcode(const TargetRegisterClass *RC, } else if (RC == &X86::VR64RegClass) { Opc = X86::MMX_MOVQ64rm; } else { - assert(0 && "Unknown regclass"); - abort(); + llvm_unreachable("Unknown regclass"); } return Opc; @@ -1856,8 +2005,10 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const MachineFunction &MF = *MBB.getParent(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); - unsigned Opc = getLoadRegOpcode(RC, isAligned); - addFrameReference(BuildMI(MBB, MI, get(Opc), DestReg), FrameIdx); + unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); } void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, @@ -1866,83 +2017,115 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl &NewMIs) const { bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); - unsigned Opc = getLoadRegOpcode(RC, isAligned); - MachineInstrBuilder MIB = BuildMI(MF, get(Opc), DestReg); + unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); + DebugLoc DL = DebugLoc::getUnknownLoc(); + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB = X86InstrAddOperand(MIB, Addr[i]); + MIB.addOperand(Addr[i]); NewMIs.push_back(MIB); } bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, + MachineBasicBlock::iterator MI, const std::vector &CSI) const { if (CSI.empty()) return false; + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + bool is64Bit = TM.getSubtarget().is64Bit(); + bool isWin64 = TM.getSubtarget().isTargetWin64(); unsigned SlotSize = is64Bit ? 8 : 4; MachineFunction &MF = *MBB.getParent(); + unsigned FPReg = RI.getFrameRegister(MF); X86MachineFunctionInfo *X86FI = MF.getInfo(); - X86FI->setCalleeSavedFrameSize(CSI.size() * SlotSize); + unsigned CalleeFrameSize = 0; unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); + const TargetRegisterClass *RegClass = CSI[i-1].getRegClass(); // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); - BuildMI(MBB, MI, get(Opc)) - .addReg(Reg, /*isDef=*/false, /*isImp=*/false, /*isKill=*/true); + if (Reg == FPReg) + // X86RegisterInfo::emitPrologue will handle spilling of frame register. + continue; + if (RegClass != &X86::VR128RegClass && !isWin64) { + CalleeFrameSize += SlotSize; + BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); + } else { + storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass); + } } + + X86FI->setCalleeSavedFrameSize(CalleeFrameSize); return true; } bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, + MachineBasicBlock::iterator MI, const std::vector &CSI) const { if (CSI.empty()) return false; - - bool is64Bit = TM.getSubtarget().is64Bit(); + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + MachineFunction &MF = *MBB.getParent(); + unsigned FPReg = RI.getFrameRegister(MF); + bool is64Bit = TM.getSubtarget().is64Bit(); + bool isWin64 = TM.getSubtarget().isTargetWin64(); unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - BuildMI(MBB, MI, get(Opc), Reg); + if (Reg == FPReg) + // X86RegisterInfo::emitEpilogue will handle restoring of frame register. + continue; + const TargetRegisterClass *RegClass = CSI[i].getRegClass(); + if (RegClass != &X86::VR128RegClass && !isWin64) { + BuildMI(MBB, MI, DL, get(Opc), Reg); + } else { + loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass); + } } return true; } static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, - const SmallVector &MOs, - MachineInstr *MI, const TargetInstrInfo &TII) { + const SmallVectorImpl &MOs, + MachineInstr *MI, + const TargetInstrInfo &TII) { // Create the base instruction with the memory operand as the first part. - MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), true); + MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), + MI->getDebugLoc(), true); MachineInstrBuilder MIB(NewMI); unsigned NumAddrOps = MOs.size(); for (unsigned i = 0; i != NumAddrOps; ++i) - MIB = X86InstrAddOperand(MIB, MOs[i]); + MIB.addOperand(MOs[i]); if (NumAddrOps < 4) // FrameIndex only - MIB.addImm(1).addReg(0).addImm(0); + addOffset(MIB, 0); // Loop over the rest of the ri operands, converting them over. unsigned NumOps = MI->getDesc().getNumOperands()-2; for (unsigned i = 0; i != NumOps; ++i) { MachineOperand &MO = MI->getOperand(i+2); - MIB = X86InstrAddOperand(MIB, MO); + MIB.addOperand(MO); } for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - MIB = X86InstrAddOperand(MIB, MO); + MIB.addOperand(MO); } return MIB; } static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode, unsigned OpNo, - const SmallVector &MOs, + const SmallVectorImpl &MOs, MachineInstr *MI, const TargetInstrInfo &TII) { - MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), true); + MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), + MI->getDebugLoc(), true); MachineInstrBuilder MIB(NewMI); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -1951,35 +2134,36 @@ static MachineInstr *FuseInst(MachineFunction &MF, assert(MO.isReg() && "Expected to fold into reg operand!"); unsigned NumAddrOps = MOs.size(); for (unsigned i = 0; i != NumAddrOps; ++i) - MIB = X86InstrAddOperand(MIB, MOs[i]); + MIB.addOperand(MOs[i]); if (NumAddrOps < 4) // FrameIndex only - MIB.addImm(1).addReg(0).addImm(0); + addOffset(MIB, 0); } else { - MIB = X86InstrAddOperand(MIB, MO); + MIB.addOperand(MO); } } return MIB; } static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, - const SmallVector &MOs, + const SmallVectorImpl &MOs, MachineInstr *MI) { MachineFunction &MF = *MI->getParent()->getParent(); - MachineInstrBuilder MIB = BuildMI(MF, TII.get(Opcode)); + MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode)); unsigned NumAddrOps = MOs.size(); for (unsigned i = 0; i != NumAddrOps; ++i) - MIB = X86InstrAddOperand(MIB, MOs[i]); + MIB.addOperand(MOs[i]); if (NumAddrOps < 4) // FrameIndex only - MIB.addImm(1).addReg(0).addImm(0); + addOffset(MIB, 0); return MIB.addImm(0); } MachineInstr* -X86InstrInfo::foldMemoryOperand(MachineFunction &MF, - MachineInstr *MI, unsigned i, - const SmallVector &MOs) const{ - const DenseMap *OpcodeTablePtr = NULL; +X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, unsigned i, + const SmallVectorImpl &MOs, + unsigned Align) const { + const DenseMap > *OpcodeTablePtr=NULL; bool isTwoAddrFold = false; unsigned NumOps = MI->getDesc().getNumOperands(); bool isTwoAddr = NumOps > 1 && @@ -2000,8 +2184,6 @@ X86InstrInfo::foldMemoryOperand(MachineFunction &MF, NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI); else if (MI->getOpcode() == X86::MOV32r0) NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); - else if (MI->getOpcode() == X86::MOV64r0) - NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI); else if (MI->getOpcode() == X86::MOV8r0) NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); if (NewMI) @@ -2017,51 +2199,36 @@ X86InstrInfo::foldMemoryOperand(MachineFunction &MF, // If table selected... if (OpcodeTablePtr) { // Find the Opcode to fuse - DenseMap::iterator I = + DenseMap >::iterator I = OpcodeTablePtr->find((unsigned*)MI->getOpcode()); if (I != OpcodeTablePtr->end()) { + unsigned MinAlign = I->second.second; + if (Align < MinAlign) + return NULL; if (isTwoAddrFold) - NewMI = FuseTwoAddrInst(MF, I->second, MOs, MI, *this); + NewMI = FuseTwoAddrInst(MF, I->second.first, MOs, MI, *this); else - NewMI = FuseInst(MF, I->second, i, MOs, MI, *this); + NewMI = FuseInst(MF, I->second.first, i, MOs, MI, *this); return NewMI; } } // No fusion if (PrintFailedFusing) - cerr << "We failed to fuse operand " << i << *MI; + errs() << "We failed to fuse operand " << i << " in " << *MI; return NULL; } -MachineInstr* X86InstrInfo::foldMemoryOperand(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl &Ops, - int FrameIndex) const { +MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl &Ops, + int FrameIndex) const { // Check switch flag if (NoFusing) return NULL; const MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned Alignment = MFI->getObjectAlignment(FrameIndex); - // FIXME: Move alignment requirement into tables? - if (Alignment < 16) { - switch (MI->getOpcode()) { - default: break; - // Not always safe to fold movsd into these instructions since their load - // folding variants expects the address to be 16 byte aligned. - case X86::FsANDNPDrr: - case X86::FsANDNPSrr: - case X86::FsANDPDrr: - case X86::FsANDPSrr: - case X86::FsORPDrr: - case X86::FsORPSrr: - case X86::FsXORPDrr: - case X86::FsXORPSrr: - return NULL; - } - } - if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { unsigned NewOpc = 0; switch (MI->getOpcode()) { @@ -2079,13 +2246,13 @@ MachineInstr* X86InstrInfo::foldMemoryOperand(MachineFunction &MF, SmallVector MOs; MOs.push_back(MachineOperand::CreateFI(FrameIndex)); - return foldMemoryOperand(MF, MI, Ops[0], MOs); + return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Alignment); } -MachineInstr* X86InstrInfo::foldMemoryOperand(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl &Ops, - MachineInstr *LoadMI) const { +MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl &Ops, + MachineInstr *LoadMI) const { // Check switch flag if (NoFusing) return NULL; @@ -2093,25 +2260,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperand(MachineFunction &MF, unsigned Alignment = 0; if (LoadMI->hasOneMemOperand()) Alignment = LoadMI->memoperands_begin()->getAlignment(); - - // FIXME: Move alignment requirement into tables? - if (Alignment < 16) { - switch (MI->getOpcode()) { - default: break; - // Not always safe to fold movsd into these instructions since their load - // folding variants expects the address to be 16 byte aligned. - case X86::FsANDNPDrr: - case X86::FsANDNPSrr: - case X86::FsANDPDrr: - case X86::FsANDPSrr: - case X86::FsORPDrr: - case X86::FsORPSrr: - case X86::FsXORPDrr: - case X86::FsXORPSrr: - return NULL; - } - } - + else if (LoadMI->getOpcode() == X86::V_SET0 || + LoadMI->getOpcode() == X86::V_SETALLONES) + Alignment = 16; if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { unsigned NewOpc = 0; switch (MI->getOpcode()) { @@ -2127,7 +2278,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperand(MachineFunction &MF, } else if (Ops.size() != 1) return NULL; - SmallVector MOs; + SmallVector MOs; if (LoadMI->getOpcode() == X86::V_SET0 || LoadMI->getOpcode() == X86::V_SETALLONES) { // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. @@ -2135,30 +2286,39 @@ MachineInstr* X86InstrInfo::foldMemoryOperand(MachineFunction &MF, // x86-32 PIC requires a PIC base register for constant pools. unsigned PICBase = 0; - if (TM.getRelocationModel() == Reloc::PIC_ && - !TM.getSubtarget().is64Bit()) - PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF); + if (TM.getRelocationModel() == Reloc::PIC_) { + if (TM.getSubtarget().is64Bit()) + PICBase = X86::RIP; + else + // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF); + // This doesn't work for several reasons. + // 1. GlobalBaseReg may have been spilled. + // 2. It may not be live at MI. + return false; + } // Create a v4i32 constant-pool entry. MachineConstantPool &MCP = *MF.getConstantPool(); - const VectorType *Ty = VectorType::get(Type::Int32Ty, 4); + const VectorType *Ty = + VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); Constant *C = LoadMI->getOpcode() == X86::V_SET0 ? - ConstantVector::getNullValue(Ty) : - ConstantVector::getAllOnesValue(Ty); - unsigned CPI = MCP.getConstantPoolIndex(C, /*AlignmentLog2=*/4); + Constant::getNullValue(Ty) : + Constant::getAllOnesValue(Ty); + unsigned CPI = MCP.getConstantPoolIndex(C, 16); // Create operands to load from the constant pool entry. MOs.push_back(MachineOperand::CreateReg(PICBase, false)); MOs.push_back(MachineOperand::CreateImm(1)); MOs.push_back(MachineOperand::CreateReg(0, false)); MOs.push_back(MachineOperand::CreateCPI(CPI, 0)); + MOs.push_back(MachineOperand::CreateReg(0, false)); } else { // Folding a normal load. Just copy the load's address operands. unsigned NumOps = LoadMI->getDesc().getNumOperands(); - for (unsigned i = NumOps - 4; i != NumOps; ++i) + for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i) MOs.push_back(LoadMI->getOperand(i)); } - return foldMemoryOperand(MF, MI, Ops[0], MOs); + return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Alignment); } @@ -2190,15 +2350,14 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, // Folding a memory location into the two-address part of a two-address // instruction is different than folding it other places. It requires // replacing the *two* registers with the memory location. - const DenseMap *OpcodeTablePtr = NULL; + const DenseMap > *OpcodeTablePtr=NULL; if (isTwoAddr && NumOps >= 2 && OpNum < 2) { OpcodeTablePtr = &RegOp2MemOpTable2Addr; } else if (OpNum == 0) { // If operand 0 switch (Opc) { + case X86::MOV8r0: case X86::MOV16r0: case X86::MOV32r0: - case X86::MOV64r0: - case X86::MOV8r0: return true; default: break; } @@ -2211,7 +2370,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, if (OpcodeTablePtr) { // Find the Opcode to fuse - DenseMap::iterator I = + DenseMap >::iterator I = OpcodeTablePtr->find((unsigned*)Opc); if (I != OpcodeTablePtr->end()) return true; @@ -2221,11 +2380,12 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, - SmallVectorImpl &NewMIs) const { + SmallVectorImpl &NewMIs) const { DenseMap >::iterator I = MemOp2RegOpTable.find((unsigned*)MI->getOpcode()); if (I == MemOp2RegOpTable.end()) return false; + DebugLoc dl = MI->getDebugLoc(); unsigned Opc = I->second.first; unsigned Index = I->second.second & 0xf; bool FoldedLoad = I->second.second & (1 << 4); @@ -2239,15 +2399,14 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, const TargetInstrDesc &TID = get(Opc); const TargetOperandInfo &TOI = TID.OpInfo[Index]; - const TargetRegisterClass *RC = TOI.isLookupPtrRegClass() - ? getPointerRegClass() : RI.getRegClass(TOI.RegClass); - SmallVector AddrOps; + const TargetRegisterClass *RC = TOI.getRegClass(&RI); + SmallVector AddrOps; SmallVector BeforeOps; SmallVector AfterOps; SmallVector ImpOps; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &Op = MI->getOperand(i); - if (i >= Index && i < Index+4) + if (i >= Index && i < Index + X86AddrNumOperands) AddrOps.push_back(Op); else if (Op.isReg() && Op.isImplicit()) ImpOps.push_back(Op); @@ -2262,7 +2421,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, loadRegFromAddr(MF, Reg, AddrOps, RC, NewMIs); if (UnfoldStore) { // Address operands cannot be marked isKill. - for (unsigned i = 1; i != 5; ++i) { + for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) { MachineOperand &MO = NewMIs[0]->getOperand(i); if (MO.isReg()) MO.setIsKill(false); @@ -2271,20 +2430,25 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, } // Emit the data processing instruction. - MachineInstr *DataMI = MF.CreateMachineInstr(TID, true); + MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true); MachineInstrBuilder MIB(DataMI); if (FoldedStore) - MIB.addReg(Reg, true); + MIB.addReg(Reg, RegState::Define); for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i) - MIB = X86InstrAddOperand(MIB, BeforeOps[i]); + MIB.addOperand(BeforeOps[i]); if (FoldedLoad) MIB.addReg(Reg); for (unsigned i = 0, e = AfterOps.size(); i != e; ++i) - MIB = X86InstrAddOperand(MIB, AfterOps[i]); + MIB.addOperand(AfterOps[i]); for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) { MachineOperand &MO = ImpOps[i]; - MIB.addReg(MO.getReg(), MO.isDef(), true, MO.isKill(), MO.isDead()); + MIB.addReg(MO.getReg(), + getDefRegState(MO.isDef()) | + RegState::Implicit | + getKillRegState(MO.isKill()) | + getDeadRegState(MO.isDead()) | + getUndefRegState(MO.isUndef())); } // Change CMP32ri r, 0 back to TEST32rr r, r, etc. unsigned NewOpc = 0; @@ -2313,9 +2477,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, // Emit the store instruction. if (UnfoldStore) { - const TargetOperandInfo &DstTOI = TID.OpInfo[0]; - const TargetRegisterClass *DstRC = DstTOI.isLookupPtrRegClass() - ? getPointerRegClass() : RI.getRegClass(DstTOI.RegClass); + const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI); storeRegToAddr(MF, Reg, true, AddrOps, DstRC, NewMIs); } @@ -2324,7 +2486,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, bool X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, - SmallVectorImpl &NewNodes) const { + SmallVectorImpl &NewNodes) const { if (!N->isMachineOpcode()) return false; @@ -2337,20 +2499,20 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, bool FoldedLoad = I->second.second & (1 << 4); bool FoldedStore = I->second.second & (1 << 5); const TargetInstrDesc &TID = get(Opc); - const TargetOperandInfo &TOI = TID.OpInfo[Index]; - const TargetRegisterClass *RC = TOI.isLookupPtrRegClass() - ? getPointerRegClass() : RI.getRegClass(TOI.RegClass); + const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI); + unsigned NumDefs = TID.NumDefs; std::vector AddrOps; std::vector BeforeOps; std::vector AfterOps; + DebugLoc dl = N->getDebugLoc(); unsigned NumOps = N->getNumOperands(); for (unsigned i = 0; i != NumOps-1; ++i) { SDValue Op = N->getOperand(i); - if (i >= Index && i < Index+4) + if (i >= Index-NumDefs && i < Index-NumDefs + X86AddrNumOperands) AddrOps.push_back(Op); - else if (i < Index) + else if (i < Index-NumDefs) BeforeOps.push_back(Op); - else if (i > Index) + else if (i > Index-NumDefs) AfterOps.push_back(Op); } SDValue Chain = N->getOperand(NumOps-1); @@ -2360,33 +2522,31 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, SDNode *Load = 0; const MachineFunction &MF = DAG.getMachineFunction(); if (FoldedLoad) { - MVT VT = *RC->vt_begin(); + EVT VT = *RC->vt_begin(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); - Load = DAG.getTargetNode(getLoadRegOpcode(RC, isAligned), - VT, MVT::Other, - &AddrOps[0], AddrOps.size()); + Load = DAG.getTargetNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, + VT, MVT::Other, &AddrOps[0], AddrOps.size()); NewNodes.push_back(Load); } // Emit the data processing instruction. - std::vector VTs; + std::vector VTs; const TargetRegisterClass *DstRC = 0; if (TID.getNumDefs() > 0) { - const TargetOperandInfo &DstTOI = TID.OpInfo[0]; - DstRC = DstTOI.isLookupPtrRegClass() - ? getPointerRegClass() : RI.getRegClass(DstTOI.RegClass); + DstRC = TID.OpInfo[0].getRegClass(&RI); VTs.push_back(*DstRC->vt_begin()); } for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { - MVT VT = N->getValueType(i); + EVT VT = N->getValueType(i); if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs()) VTs.push_back(VT); } if (Load) BeforeOps.push_back(SDValue(Load, 0)); std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps)); - SDNode *NewNode= DAG.getTargetNode(Opc, VTs, &BeforeOps[0], BeforeOps.size()); + SDNode *NewNode= DAG.getTargetNode(Opc, dl, VTs, &BeforeOps[0], + BeforeOps.size()); NewNodes.push_back(NewNode); // Emit the store instruction. @@ -2396,8 +2556,10 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, AddrOps.push_back(Chain); bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); - SDNode *Store = DAG.getTargetNode(getStoreRegOpcode(DstRC, isAligned), - MVT::Other, &AddrOps[0], AddrOps.size()); + SDNode *Store = DAG.getTargetNode(getStoreRegOpcode(0, DstRC, + isAligned, TM), + dl, MVT::Other, + &AddrOps[0], AddrOps.size()); NewNodes.push_back(Store); } @@ -2451,19 +2613,11 @@ ReverseBranchCondition(SmallVectorImpl &Cond) const { } bool X86InstrInfo:: -IgnoreRegisterClassBarriers(const TargetRegisterClass *RC) const { - // FIXME: Ignore bariers of x87 stack registers for now. We can't +isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { + // FIXME: Return false for x87 stack register classes for now. We can't // allow any loads of these registers before FpGet_ST0_80. - return RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass || - RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass; -} - -const TargetRegisterClass *X86InstrInfo::getPointerRegClass() const { - const X86Subtarget *Subtarget = &TM.getSubtarget(); - if (Subtarget->is64Bit()) - return &X86::GR64RegClass; - else - return &X86::GR32RegClass; + return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass || + RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass); } unsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) { @@ -2472,7 +2626,7 @@ unsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) { case X86II::Imm16: return 2; case X86II::Imm32: return 4; case X86II::Imm64: return 8; - default: assert(0 && "Immediate size not set!"); + default: llvm_unreachable("Immediate size not set!"); return 0; } } @@ -2564,7 +2718,7 @@ unsigned X86InstrInfo::determineREX(const MachineInstr &MI) { case X86II::MRM4m: case X86II::MRM5m: case X86II::MRM6m: case X86II::MRM7m: case X86II::MRMDestMem: { - unsigned e = isTwoAddr ? 5 : 4; + unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands); i = isTwoAddr ? 1 : 0; if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e))) REX |= 1 << 2; @@ -2657,7 +2811,7 @@ static unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) { } else if (RelocOp->isJTI()) { FinalSize += sizeJumpTableAddress(false); } else { - assert(0 && "Unknown value to relocate!"); + llvm_unreachable("Unknown value to relocate!"); } return FinalSize; } @@ -2694,8 +2848,9 @@ static unsigned getMemModRMByteSize(const MachineInstr &MI, unsigned Op, unsigned BaseReg = Base.getReg(); // Is a SIB byte needed? - if (IndexReg.getReg() == 0 && - (BaseReg == 0 || X86RegisterInfo::getX86RegNum(BaseReg) != N86::ESP)) { + if ((!Is64BitMode || DispForReloc || BaseReg != 0) && + IndexReg.getReg() == 0 && + (BaseReg == 0 || X86RegisterInfo::getX86RegNum(BaseReg) != N86::ESP)) { if (BaseReg == 0) { // Just a displacement? // Emit special case [disp32] encoding ++FinalSize; @@ -2747,13 +2902,13 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, // Emit the lock opcode prefix as needed. if (Desc->TSFlags & X86II::LOCK) ++FinalSize; - // Emit segment overrid opcode prefix as needed. + // Emit segment override opcode prefix as needed. switch (Desc->TSFlags & X86II::SegOvrMask) { case X86II::FS: case X86II::GS: ++FinalSize; break; - default: assert(0 && "Invalid segment!"); + default: llvm_unreachable("Invalid segment!"); case 0: break; // No segment override! } @@ -2773,6 +2928,10 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::TA: // 0F 3A Need0FPrefix = true; break; + case X86II::TF: // F2 0F 38 + ++FinalSize; + Need0FPrefix = true; + break; case X86II::REP: break; // already handled. case X86II::XS: // F3 0F ++FinalSize; @@ -2786,7 +2945,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF: ++FinalSize; break; // Two-byte opcode prefix - default: assert(0 && "Invalid prefix!"); + default: llvm_unreachable("Invalid prefix!"); case 0: break; // No prefix! } @@ -2805,7 +2964,10 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::T8: // 0F 38 ++FinalSize; break; - case X86II::TA: // 0F 3A + case X86II::TA: // 0F 3A + ++FinalSize; + break; + case X86II::TF: // F2 0F 38 ++FinalSize; break; } @@ -2815,9 +2977,12 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, unsigned CurOp = 0; if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1) CurOp++; + else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) + // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 + --NumOps; switch (Desc->TSFlags & X86II::FormMask) { - default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!"); + default: llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!"); case X86II::Pseudo: // Remember the current PC offset, this is the PIC relocation // base address. @@ -2826,16 +2991,15 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, break; case TargetInstrInfo::INLINEASM: { const MachineFunction *MF = MI.getParent()->getParent(); - const char *AsmStr = MI.getOperand(0).getSymbolName(); - const TargetAsmInfo* AI = MF->getTarget().getTargetAsmInfo(); - FinalSize += AI->getInlineAsmLength(AsmStr); + const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); + FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(), + *MF->getTarget().getMCAsmInfo()); break; } case TargetInstrInfo::DBG_LABEL: case TargetInstrInfo::EH_LABEL: break; case TargetInstrInfo::IMPLICIT_DEF: - case TargetInstrInfo::DECLARE: case X86::DWARF_LOC: case X86::FP_REG_KILL: break; @@ -2845,11 +3009,6 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); break; } - case X86::TLS_tp: - case X86::TLS_gs_ri: - FinalSize += 2; - FinalSize += sizeGlobalAddress(false); - break; } CurOp = NumOps; break; @@ -2867,7 +3026,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, } else if (MO.isImm()) { FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); } else { - assert(0 && "Unknown RawFrm operand!"); + llvm_unreachable("Unknown RawFrm operand!"); } } break; @@ -2910,7 +3069,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::MRMDestMem: { ++FinalSize; FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); - CurOp += 5; + CurOp += X86AddrNumOperands + 1; if (CurOp != NumOps) { ++CurOp; FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); @@ -2929,10 +3088,16 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, break; case X86II::MRMSrcMem: { + int AddrOperands; + if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || + Opcode == X86::LEA16r || Opcode == X86::LEA32r) + AddrOperands = X86AddrNumOperands - 1; // No segment register + else + AddrOperands = X86AddrNumOperands; ++FinalSize; FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode); - CurOp += 5; + CurOp += AddrOperands + 1; if (CurOp != NumOps) { ++CurOp; FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); @@ -2945,8 +3110,18 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::MRM4r: case X86II::MRM5r: case X86II::MRM6r: case X86II::MRM7r: ++FinalSize; - ++CurOp; - FinalSize += sizeRegModRMByte(); + if (Desc->getOpcode() == X86::LFENCE || + Desc->getOpcode() == X86::MFENCE) { + // Special handling of lfence and mfence; + FinalSize += sizeRegModRMByte(); + } else if (Desc->getOpcode() == X86::MONITOR || + Desc->getOpcode() == X86::MWAIT) { + // Special handling of monitor and mwait. + FinalSize += sizeRegModRMByte() + 1; // +1 for the opcode. + } else { + ++CurOp; + FinalSize += sizeRegModRMByte(); + } if (CurOp != NumOps) { const MachineOperand &MO1 = MI.getOperand(CurOp++); @@ -2976,7 +3151,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, ++FinalSize; FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); - CurOp += 4; + CurOp += X86AddrNumOperands; if (CurOp != NumOps) { const MachineOperand &MO = MI.getOperand(CurOp++); @@ -3009,10 +3184,10 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, } if (!Desc->isVariadic() && CurOp != NumOps) { - cerr << "Cannot determine size: "; - MI.dump(); - cerr << '\n'; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Cannot determine size: " << MI; + llvm_report_error(Msg.str()); } @@ -3022,12 +3197,11 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { const TargetInstrDesc &Desc = MI->getDesc(); - bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_); + bool IsPIC = TM.getRelocationModel() == Reloc::PIC_; bool Is64BitMode = TM.getSubtargetImpl()->is64Bit(); unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode); - if (Desc.getOpcode() == X86::MOVPC32r) { + if (Desc.getOpcode() == X86::MOVPC32r) Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode); - } return Size; } @@ -3047,22 +3221,24 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { // Insert the set of GlobalBaseReg into the first MBB of the function MachineBasicBlock &FirstMBB = MF->front(); MachineBasicBlock::iterator MBBI = FirstMBB.begin(); + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MBBI != FirstMBB.end()) DL = MBBI->getDebugLoc(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); const TargetInstrInfo *TII = TM.getInstrInfo(); // Operand of MovePCtoStack is completely ignored by asm printer. It's // only used in JIT code emission as displacement to pc. - BuildMI(FirstMBB, MBBI, TII->get(X86::MOVPC32r), PC).addImm(0); + BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); // If we're using vanilla 'GOT' PIC style, we should use relative addressing - // not to pc, but to _GLOBAL_ADDRESS_TABLE_ external - if (TM.getRelocationModel() == Reloc::PIC_ && - TM.getSubtarget().isPICStyleGOT()) { - GlobalBaseReg = - RegInfo.createVirtualRegister(X86::GR32RegisterClass); - BuildMI(FirstMBB, MBBI, TII->get(X86::ADD32ri), GlobalBaseReg) - .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); + // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. + if (TM.getSubtarget().isPICStyleGOT()) { + GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); + // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register + BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) + .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", + X86II::MO_GOT_ABSOLUTE_ADDRESS); } else { GlobalBaseReg = PC; }