X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86InstrInfo.cpp;h=bbe5bd363ee978b919c48a36db7fae6bf47cbaac;hb=46e803b3e649c20ee3141341b2e58d5d64489e33;hp=adda5aa8e6937c642bd023d5feb346e3660799b5;hpb=828bb6c97881fe4f2f27bdc9096ca3f795941253;p=oota-llvm.git diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index adda5aa8e69..bbe5bd363ee 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/LiveVariables.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetAsmInfo.h" using namespace llvm; @@ -37,6 +38,10 @@ namespace { cl::desc("Print instructions that the allocator wants to" " fuse, but the X86 backend currently can't"), cl::Hidden); + cl::opt + ReMatPICStubLoad("remat-pic-stub-load", + cl::desc("Re-materialize load from stub in PIC mode"), + cl::init(false), cl::Hidden); } X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) @@ -218,11 +223,15 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::CALL64r, X86::CALL64m, 1 }, { X86::CMP16ri, X86::CMP16mi, 1 }, { X86::CMP16ri8, X86::CMP16mi8, 1 }, + { X86::CMP16rr, X86::CMP16mr, 1 }, { X86::CMP32ri, X86::CMP32mi, 1 }, { X86::CMP32ri8, X86::CMP32mi8, 1 }, + { X86::CMP32rr, X86::CMP32mr, 1 }, { X86::CMP64ri32, X86::CMP64mi32, 1 }, { X86::CMP64ri8, X86::CMP64mi8, 1 }, + { X86::CMP64rr, X86::CMP64mr, 1 }, { X86::CMP8ri, X86::CMP8mi, 1 }, + { X86::CMP8rr, X86::CMP8mr, 1 }, { X86::DIV16r, X86::DIV16m, 1 }, { X86::DIV32r, X86::DIV32m, 1 }, { X86::DIV64r, X86::DIV64m, 1 }, @@ -388,7 +397,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PSHUFDri, X86::PSHUFDmi }, { X86::PSHUFHWri, X86::PSHUFHWmi }, { X86::PSHUFLWri, X86::PSHUFLWmi }, - { X86::PsMOVZX64rr32, X86::PsMOVZX64rm32 }, { X86::RCPPSr, X86::RCPPSm }, { X86::RCPPSr_Int, X86::RCPPSm_Int }, { X86::RSQRTPSr, X86::RSQRTPSm }, @@ -495,6 +503,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::DIVPSrr, X86::DIVPSrm }, { X86::DIVSDrr, X86::DIVSDrm }, { X86::DIVSSrr, X86::DIVSSrm }, + { X86::FsANDNPDrr, X86::FsANDNPDrm }, + { X86::FsANDNPSrr, X86::FsANDNPSrm }, + { X86::FsANDPDrr, X86::FsANDPDrm }, + { X86::FsANDPSrr, X86::FsANDPSrm }, + { X86::FsORPDrr, X86::FsORPDrm }, + { X86::FsORPSrr, X86::FsORPSrm }, + { X86::FsXORPDrr, X86::FsXORPDrm }, + { X86::FsXORPSrr, X86::FsXORPSrm }, { X86::HADDPDrr, X86::HADDPDrm }, { X86::HADDPSrr, X86::HADDPSrm }, { X86::HSUBPDrr, X86::HSUBPDrm }, @@ -553,8 +569,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PMAXUBrr, X86::PMAXUBrm }, { X86::PMINSWrr, X86::PMINSWrm }, { X86::PMINUBrr, X86::PMINUBrm }, + { X86::PMULDQrr, X86::PMULDQrm }, + { X86::PMULDQrr_int, X86::PMULDQrm_int }, { X86::PMULHUWrr, X86::PMULHUWrm }, { X86::PMULHWrr, X86::PMULHWrm }, + { X86::PMULLDrr, X86::PMULLDrm }, + { X86::PMULLDrr_int, X86::PMULLDrm_int }, { X86::PMULLWrr, X86::PMULLWrm }, { X86::PMULUDQrr, X86::PMULUDQrm }, { X86::PORrr, X86::PORrm }, @@ -624,26 +644,41 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, unsigned& sourceReg, unsigned& destReg) const { - unsigned oc = MI.getOpcode(); - if (oc == X86::MOV8rr || oc == X86::MOV16rr || - oc == X86::MOV32rr || oc == X86::MOV64rr || - oc == X86::MOV16to16_ || oc == X86::MOV32to32_ || - oc == X86::MOV_Fp3232 || oc == X86::MOVSSrr || oc == X86::MOVSDrr || - oc == X86::MOV_Fp3264 || oc == X86::MOV_Fp6432 || oc == X86::MOV_Fp6464 || - oc == X86::FsMOVAPSrr || oc == X86::FsMOVAPDrr || - oc == X86::MOVAPSrr || oc == X86::MOVAPDrr || - oc == X86::MOVSS2PSrr || oc == X86::MOVSD2PDrr || - oc == X86::MOVPS2SSrr || oc == X86::MOVPD2SDrr || - oc == X86::MMX_MOVD64rr || oc == X86::MMX_MOVQ64rr) { - assert(MI.getNumOperands() >= 2 && - MI.getOperand(0).isRegister() && - MI.getOperand(1).isRegister() && - "invalid register-register move instruction"); - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; + switch (MI.getOpcode()) { + default: + return false; + case X86::MOV8rr: + case X86::MOV16rr: + case X86::MOV32rr: + case X86::MOV64rr: + case X86::MOV16to16_: + case X86::MOV32to32_: + case X86::MOVSSrr: + case X86::MOVSDrr: + + // FP Stack register class copies + case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080: + case X86::MOV_Fp3264: case X86::MOV_Fp3280: + case X86::MOV_Fp6432: case X86::MOV_Fp8032: + + case X86::FsMOVAPSrr: + case X86::FsMOVAPDrr: + case X86::MOVAPSrr: + case X86::MOVAPDrr: + case X86::MOVSS2PSrr: + case X86::MOVSD2PDrr: + case X86::MOVPS2SSrr: + case X86::MOVPD2SDrr: + case X86::MMX_MOVD64rr: + case X86::MMX_MOVQ64rr: + assert(MI.getNumOperands() >= 2 && + MI.getOperand(0).isRegister() && + MI.getOperand(1).isRegister() && + "invalid register-register move instruction"); + sourceReg = MI.getOperand(1).getReg(); + destReg = MI.getOperand(0).getReg(); + return true; } - return false; } unsigned X86InstrInfo::isLoadFromStackSlot(MachineInstr *MI, @@ -708,49 +743,136 @@ unsigned X86InstrInfo::isStoreToStackSlot(MachineInstr *MI, } -bool X86InstrInfo::isReallyTriviallyReMaterializable(MachineInstr *MI) const { +/// regIsPICBase - Return true if register is PIC base (i.e.g defined by +/// X86::MOVPC32r. +static bool regIsPICBase(unsigned BaseReg, MachineRegisterInfo &MRI) { + bool isPICBase = false; + for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), + E = MRI.def_end(); I != E; ++I) { + MachineInstr *DefMI = I.getOperand().getParent(); + if (DefMI->getOpcode() != X86::MOVPC32r) + return false; + assert(!isPICBase && "More than one PIC base?"); + isPICBase = true; + } + return isPICBase; +} + +/// isGVStub - Return true if the GV requires an extra load to get the +/// real address. +static inline bool isGVStub(GlobalValue *GV, X86TargetMachine &TM) { + return TM.getSubtarget().GVRequiresExtraLoad(GV, TM, false); +} + +bool +X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const { switch (MI->getOpcode()) { default: break; - case X86::MOV8rm: - case X86::MOV16rm: - case X86::MOV16_rm: - case X86::MOV32rm: - case X86::MOV32_rm: - case X86::MOV64rm: - case X86::LD_Fp64m: - case X86::MOVSSrm: - case X86::MOVSDrm: - case X86::MOVAPSrm: - case X86::MOVAPDrm: - case X86::MMX_MOVD64rm: - case X86::MMX_MOVQ64rm: - // Loads from constant pools are trivially rematerializable. - if (MI->getOperand(1).isReg() && MI->getOperand(2).isImm() && - MI->getOperand(3).isReg() && MI->getOperand(4).isCPI() && - MI->getOperand(1).getReg() == 0 && - MI->getOperand(2).getImm() == 1 && - MI->getOperand(3).getReg() == 0) - return true; - - // If this is a load from a fixed argument slot, we know the value is - // invariant across the whole function, because we don't redefine argument - // values. -#if 0 - // FIXME: This is disabled due to a remat bug. rdar://5671644 - if (MI->getOperand(1).isFI()) { - const MachineFrameInfo &MFI=*MI->getParent()->getParent()->getFrameInfo(); - int Idx = MI->getOperand(1).getIndex(); - return MFI.isFixedObjectIndex(Idx) && MFI.isImmutableObjectIndex(Idx); + case X86::MOV8rm: + case X86::MOV16rm: + case X86::MOV16_rm: + case X86::MOV32rm: + case X86::MOV32_rm: + case X86::MOV64rm: + case X86::LD_Fp64m: + case X86::MOVSSrm: + case X86::MOVSDrm: + case X86::MOVAPSrm: + case X86::MOVAPDrm: + case X86::MMX_MOVD64rm: + case X86::MMX_MOVQ64rm: { + // Loads from constant pools are trivially rematerializable. + if (MI->getOperand(1).isReg() && + MI->getOperand(2).isImm() && + MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && + (MI->getOperand(4).isCPI() || + (MI->getOperand(4).isGlobal() && + isGVStub(MI->getOperand(4).getGlobal(), TM)))) { + unsigned BaseReg = MI->getOperand(1).getReg(); + if (BaseReg == 0) + return true; + // Allow re-materialization of PIC load. + if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) + return false; + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + bool isPICBase = false; + for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), + E = MRI.def_end(); I != E; ++I) { + MachineInstr *DefMI = I.getOperand().getParent(); + if (DefMI->getOpcode() != X86::MOVPC32r) + return false; + assert(!isPICBase && "More than one PIC base?"); + isPICBase = true; + } + return isPICBase; + } + return false; } -#endif - - return false; + + case X86::LEA32r: + case X86::LEA64r: { + if (MI->getOperand(1).isReg() && + MI->getOperand(2).isImm() && + MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && + !MI->getOperand(4).isReg()) { + // lea fi#, lea GV, etc. are all rematerializable. + unsigned BaseReg = MI->getOperand(1).getReg(); + if (BaseReg == 0) + return true; + // Allow re-materialization of lea PICBase + x. + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + return regIsPICBase(BaseReg, MRI); + } + return false; + } } + // All other instructions marked M_REMATERIALIZABLE are always trivially // rematerializable. return true; } +void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, + const MachineInstr *Orig) const { + unsigned SubIdx = Orig->getOperand(0).isReg() + ? Orig->getOperand(0).getSubReg() : 0; + bool ChangeSubIdx = SubIdx != 0; + if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { + DestReg = RI.getSubReg(DestReg, SubIdx); + SubIdx = 0; + } + + // MOV32r0 etc. are implemented with xor which clobbers condition code. + // Re-materialize them as movri instructions to avoid side effects. + switch (Orig->getOpcode()) { + case X86::MOV8r0: + BuildMI(MBB, I, get(X86::MOV8ri), DestReg).addImm(0); + break; + case X86::MOV16r0: + BuildMI(MBB, I, get(X86::MOV16ri), DestReg).addImm(0); + break; + case X86::MOV32r0: + BuildMI(MBB, I, get(X86::MOV32ri), DestReg).addImm(0); + break; + case X86::MOV64r0: + BuildMI(MBB, I, get(X86::MOV64ri32), DestReg).addImm(0); + break; + default: { + MachineInstr *MI = Orig->clone(); + MI->getOperand(0).setReg(DestReg); + MBB.insert(I, MI); + break; + } + } + + if (ChangeSubIdx) { + MachineInstr *NewMI = prior(I); + NewMI->getOperand(0).setSubReg(SubIdx); + } +} + /// isInvariantLoad - Return true if the specified instruction (which is marked /// mayLoad) is loading from a location whose value is invariant across the /// function. For example, loading a value from the constant pool or from @@ -761,7 +883,7 @@ bool X86InstrInfo::isInvariantLoad(MachineInstr *MI) const { // This code cares about loads from three cases: constant pool entries, // invariant argument slots, and global stubs. In order to handle these cases // for all of the myriad of X86 instructions, we just scan for a CP/FI/GV - // operand and base are analysis on it. This is safe because the address of + // operand and base our analysis on it. This is safe because the address of // none of these three cases is ever used as anything other than a load base // and X86 doesn't have any instructions that load from multiple places. @@ -770,13 +892,9 @@ bool X86InstrInfo::isInvariantLoad(MachineInstr *MI) const { // Loads from constant pools are trivially invariant. if (MO.isCPI()) return true; - - if (MO.isGlobal()) { - if (TM.getSubtarget().GVRequiresExtraLoad(MO.getGlobal(), - TM, false)) - return true; - return false; - } + + if (MO.isGlobal()) + return isGVStub(MO.getGlobal(), TM); // If this is a load from an invariant stack slot, the load is a constant. if (MO.isFI()) { @@ -888,17 +1006,23 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); - MachineInstr *Ins = - BuildMI(get(X86::INSERT_SUBREG), leaInReg).addReg(Src).addImm(2); - Ins->copyKillDeadInfo(MI); + // Build and insert into an implicit UNDEF value. This is OK because + // well be shifting and then extracting the lower 16-bits. + MachineInstr *Undef = BuildMI(get(X86::IMPLICIT_DEF), leaInReg); + + MachineInstr *Ins = + BuildMI(get(X86::INSERT_SUBREG),leaInReg) + .addReg(leaInReg).addReg(Src).addImm(X86::SUBREG_16BIT); NewMI = BuildMI(get(Opc), leaOutReg) .addReg(0).addImm(1 << ShAmt).addReg(leaInReg).addImm(0); MachineInstr *Ext = - BuildMI(get(X86::EXTRACT_SUBREG), Dest).addReg(leaOutReg).addImm(2); + BuildMI(get(X86::EXTRACT_SUBREG), Dest) + .addReg(leaOutReg).addImm(X86::SUBREG_16BIT); Ext->copyKillDeadInfo(MI); + MFI->insert(MBBI, Undef); MFI->insert(MBBI, Ins); // Insert the insert_subreg LV.instructionChanged(MI, NewMI); // Update live variables LV.addVirtualRegisterKilled(leaInReg, NewMI); @@ -1011,6 +1135,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } } + if (!NewMI) return 0; + NewMI->copyKillDeadInfo(MI); LV.instructionChanged(MI, NewMI); // Update live variables MFI->insert(MBBI, NewMI); // Insert the new inst @@ -1020,7 +1146,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, /// commuteInstruction - We have a few instructions that must be hacked on to /// commute them. /// -MachineInstr *X86InstrInfo::commuteInstruction(MachineInstr *MI) const { +MachineInstr * +X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { switch (MI->getOpcode()) { case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I) case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I) @@ -1045,6 +1172,15 @@ MachineInstr *X86InstrInfo::commuteInstruction(MachineInstr *MI) const { unsigned C = MI->getOperand(2).getReg(); bool BisKill = MI->getOperand(1).isKill(); bool CisKill = MI->getOperand(2).isKill(); + // If machine instrs are no longer in two-address forms, update + // destination register as well. + if (A == B) { + // Must be two address instruction! + assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) && + "Expecting a two-address instruction!"); + A = C; + CisKill = false; + } return BuildMI(get(Opc), A).addReg(C, false, false, CisKill) .addReg(B, false, false, BisKill).addImm(Size-Amt); } @@ -1141,7 +1277,7 @@ MachineInstr *X86InstrInfo::commuteInstruction(MachineInstr *MI) const { // Fallthrough intended. } default: - return TargetInstrInfoImpl::commuteInstruction(MI); + return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); } } @@ -1373,71 +1509,109 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, } void X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - if (DestRC != SrcRC) { - // Moving EFLAGS to / from another register requires a push and a pop. - if (SrcRC == &X86::CCRRegClass) { - assert(SrcReg == X86::EFLAGS); - if (DestRC == &X86::GR64RegClass) { - BuildMI(MBB, MI, get(X86::PUSHFQ)); - BuildMI(MBB, MI, get(X86::POP64r), DestReg); - return; - } else if (DestRC == &X86::GR32RegClass) { - BuildMI(MBB, MI, get(X86::PUSHFD)); - BuildMI(MBB, MI, get(X86::POP32r), DestReg); - return; - } - } else if (DestRC == &X86::CCRRegClass) { - assert(DestReg == X86::EFLAGS); - if (SrcRC == &X86::GR64RegClass) { - BuildMI(MBB, MI, get(X86::PUSH64r)).addReg(SrcReg); - BuildMI(MBB, MI, get(X86::POPFQ)); - return; - } else if (SrcRC == &X86::GR32RegClass) { - BuildMI(MBB, MI, get(X86::PUSH32r)).addReg(SrcReg); - BuildMI(MBB, MI, get(X86::POPFD)); - return; - } + MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { + if (DestRC == SrcRC) { + unsigned Opc; + if (DestRC == &X86::GR64RegClass) { + Opc = X86::MOV64rr; + } else if (DestRC == &X86::GR32RegClass) { + Opc = X86::MOV32rr; + } else if (DestRC == &X86::GR16RegClass) { + Opc = X86::MOV16rr; + } else if (DestRC == &X86::GR8RegClass) { + Opc = X86::MOV8rr; + } else if (DestRC == &X86::GR32_RegClass) { + Opc = X86::MOV32_rr; + } else if (DestRC == &X86::GR16_RegClass) { + Opc = X86::MOV16_rr; + } else if (DestRC == &X86::RFP32RegClass) { + Opc = X86::MOV_Fp3232; + } else if (DestRC == &X86::RFP64RegClass || DestRC == &X86::RSTRegClass) { + Opc = X86::MOV_Fp6464; + } else if (DestRC == &X86::RFP80RegClass) { + Opc = X86::MOV_Fp8080; + } else if (DestRC == &X86::FR32RegClass) { + Opc = X86::FsMOVAPSrr; + } else if (DestRC == &X86::FR64RegClass) { + Opc = X86::FsMOVAPDrr; + } else if (DestRC == &X86::VR128RegClass) { + Opc = X86::MOVAPSrr; + } else if (DestRC == &X86::VR64RegClass) { + Opc = X86::MMX_MOVQ64rr; + } else { + assert(0 && "Unknown regclass"); + abort(); } - cerr << "Not yet supported!"; - abort(); + BuildMI(MBB, MI, get(Opc), DestReg).addReg(SrcReg); + return; + } + + // Moving EFLAGS to / from another register requires a push and a pop. + if (SrcRC == &X86::CCRRegClass) { + assert(SrcReg == X86::EFLAGS); + if (DestRC == &X86::GR64RegClass) { + BuildMI(MBB, MI, get(X86::PUSHFQ)); + BuildMI(MBB, MI, get(X86::POP64r), DestReg); + return; + } else if (DestRC == &X86::GR32RegClass) { + BuildMI(MBB, MI, get(X86::PUSHFD)); + BuildMI(MBB, MI, get(X86::POP32r), DestReg); + return; + } + } else if (DestRC == &X86::CCRRegClass) { + assert(DestReg == X86::EFLAGS); + if (SrcRC == &X86::GR64RegClass) { + BuildMI(MBB, MI, get(X86::PUSH64r)).addReg(SrcReg); + BuildMI(MBB, MI, get(X86::POPFQ)); + return; + } else if (SrcRC == &X86::GR32RegClass) { + BuildMI(MBB, MI, get(X86::PUSH32r)).addReg(SrcReg); + BuildMI(MBB, MI, get(X86::POPFD)); + return; + } + } + + // Moving from ST(0) turns into FpGET_ST0_32 etc. + if (SrcRC == &X86::RSTRegClass) { + // Copying from ST(0)/ST(1). + assert((SrcReg == X86::ST0 || SrcReg == X86::ST1) && + "Can only copy from ST(0)/ST(1) right now"); + bool isST0 = SrcReg == X86::ST0; + unsigned Opc; + if (DestRC == &X86::RFP32RegClass) + Opc = isST0 ? X86::FpGET_ST0_32 : X86::FpGET_ST1_32; + else if (DestRC == &X86::RFP64RegClass) + Opc = isST0 ? X86::FpGET_ST0_64 : X86::FpGET_ST1_64; + else { + assert(DestRC == &X86::RFP80RegClass); + Opc = isST0 ? X86::FpGET_ST0_80 : X86::FpGET_ST1_80; + } + BuildMI(MBB, MI, get(Opc), DestReg); + return; } - unsigned Opc; - if (DestRC == &X86::GR64RegClass) { - Opc = X86::MOV64rr; - } else if (DestRC == &X86::GR32RegClass) { - Opc = X86::MOV32rr; - } else if (DestRC == &X86::GR16RegClass) { - Opc = X86::MOV16rr; - } else if (DestRC == &X86::GR8RegClass) { - Opc = X86::MOV8rr; - } else if (DestRC == &X86::GR32_RegClass) { - Opc = X86::MOV32_rr; - } else if (DestRC == &X86::GR16_RegClass) { - Opc = X86::MOV16_rr; - } else if (DestRC == &X86::RFP32RegClass) { - Opc = X86::MOV_Fp3232; - } else if (DestRC == &X86::RFP64RegClass || DestRC == &X86::RSTRegClass) { - Opc = X86::MOV_Fp6464; - } else if (DestRC == &X86::RFP80RegClass) { - Opc = X86::MOV_Fp8080; - } else if (DestRC == &X86::FR32RegClass) { - Opc = X86::FsMOVAPSrr; - } else if (DestRC == &X86::FR64RegClass) { - Opc = X86::FsMOVAPDrr; - } else if (DestRC == &X86::VR128RegClass) { - Opc = X86::MOVAPSrr; - } else if (DestRC == &X86::VR64RegClass) { - Opc = X86::MMX_MOVQ64rr; - } else { - assert(0 && "Unknown regclass"); - abort(); + // Moving to ST(0) turns into FpSET_ST0_32 etc. + if (DestRC == &X86::RSTRegClass) { + // Copying to ST(0). FIXME: handle ST(1) also + assert(DestReg == X86::ST0 && "Can only copy to TOS right now"); + unsigned Opc; + if (SrcRC == &X86::RFP32RegClass) + Opc = X86::FpSET_ST0_32; + else if (SrcRC == &X86::RFP64RegClass) + Opc = X86::FpSET_ST0_64; + else { + assert(SrcRC == &X86::RFP80RegClass); + Opc = X86::FpSET_ST0_80; + } + BuildMI(MBB, MI, get(Opc)).addReg(SrcReg); + return; } - BuildMI(MBB, MI, get(Opc), DestReg).addReg(SrcReg); + + assert(0 && "Not yet supported!"); + abort(); } static unsigned getStoreRegOpcode(const TargetRegisterClass *RC, @@ -1660,7 +1834,7 @@ static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, MachineInstr* X86InstrInfo::foldMemoryOperand(MachineInstr *MI, unsigned i, - SmallVector &MOs) const { + SmallVector &MOs) const { const DenseMap *OpcodeTablePtr = NULL; bool isTwoAddrFold = false; unsigned NumOps = MI->getDesc().getNumOperands(); @@ -1720,12 +1894,33 @@ X86InstrInfo::foldMemoryOperand(MachineInstr *MI, unsigned i, } -MachineInstr* X86InstrInfo::foldMemoryOperand(MachineInstr *MI, +MachineInstr* X86InstrInfo::foldMemoryOperand(MachineFunction &MF, + MachineInstr *MI, SmallVectorImpl &Ops, int FrameIndex) const { // Check switch flag if (NoFusing) return NULL; + const MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned Alignment = MFI->getObjectAlignment(FrameIndex); + // FIXME: Move alignment requirement into tables? + if (Alignment < 16) { + switch (MI->getOpcode()) { + default: break; + // Not always safe to fold movsd into these instructions since their load + // folding variants expects the address to be 16 byte aligned. + case X86::FsANDNPDrr: + case X86::FsANDNPSrr: + case X86::FsANDPDrr: + case X86::FsANDPSrr: + case X86::FsORPDrr: + case X86::FsORPSrr: + case X86::FsXORPDrr: + case X86::FsXORPSrr: + return NULL; + } + } + if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { unsigned NewOpc = 0; switch (MI->getOpcode()) { @@ -1746,12 +1941,39 @@ MachineInstr* X86InstrInfo::foldMemoryOperand(MachineInstr *MI, return foldMemoryOperand(MI, Ops[0], MOs); } -MachineInstr* X86InstrInfo::foldMemoryOperand(MachineInstr *MI, +MachineInstr* X86InstrInfo::foldMemoryOperand(MachineFunction &MF, + MachineInstr *MI, SmallVectorImpl &Ops, MachineInstr *LoadMI) const { // Check switch flag if (NoFusing) return NULL; + unsigned Alignment = 0; + for (unsigned i = 0, e = LoadMI->getNumMemOperands(); i != e; ++i) { + const MachineMemOperand &MRO = LoadMI->getMemOperand(i); + unsigned Align = MRO.getAlignment(); + if (Align > Alignment) + Alignment = Align; + } + + // FIXME: Move alignment requirement into tables? + if (Alignment < 16) { + switch (MI->getOpcode()) { + default: break; + // Not always safe to fold movsd into these instructions since their load + // folding variants expects the address to be 16 byte aligned. + case X86::FsANDNPDrr: + case X86::FsANDNPSrr: + case X86::FsANDPDrr: + case X86::FsANDPSrr: + case X86::FsORPDrr: + case X86::FsORPSrr: + case X86::FsXORPDrr: + case X86::FsXORPSrr: + return NULL; + } + } + if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { unsigned NewOpc = 0; switch (MI->getOpcode()) { @@ -1972,14 +2194,14 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, // Emit the load instruction. SDNode *Load = 0; if (FoldedLoad) { - MVT::ValueType VT = *RC->vt_begin(); + MVT VT = *RC->vt_begin(); Load = DAG.getTargetNode(getLoadRegOpcode(RC, RI.getStackAlignment()), VT, MVT::Other, &AddrOps[0], AddrOps.size()); NewNodes.push_back(Load); } // Emit the data processing instruction. - std::vector VTs; + std::vector VTs; const TargetRegisterClass *DstRC = 0; if (TID.getNumDefs() > 0) { const TargetOperandInfo &DstTOI = TID.OpInfo[0]; @@ -1988,7 +2210,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, VTs.push_back(*DstRC->vt_begin()); } for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { - MVT::ValueType VT = N->getValueType(i); + MVT VT = N->getValueType(i); if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs()) VTs.push_back(VT); } @@ -2061,3 +2283,552 @@ const TargetRegisterClass *X86InstrInfo::getPointerRegClass() const { else return &X86::GR32RegClass; } + +unsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) { + switch (Desc->TSFlags & X86II::ImmMask) { + case X86II::Imm8: return 1; + case X86II::Imm16: return 2; + case X86II::Imm32: return 4; + case X86II::Imm64: return 8; + default: assert(0 && "Immediate size not set!"); + return 0; + } +} + +/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended register? +/// e.g. r8, xmm8, etc. +bool X86InstrInfo::isX86_64ExtendedReg(const MachineOperand &MO) { + if (!MO.isRegister()) return false; + switch (MO.getReg()) { + default: break; + case X86::R8: case X86::R9: case X86::R10: case X86::R11: + case X86::R12: case X86::R13: case X86::R14: case X86::R15: + case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D: + case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D: + case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W: + case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W: + case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B: + case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B: + case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: + case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: + return true; + } + return false; +} + + +/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64 +/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand +/// size, and 3) use of X86-64 extended registers. +unsigned X86InstrInfo::determineREX(const MachineInstr &MI) { + unsigned REX = 0; + const TargetInstrDesc &Desc = MI.getDesc(); + + // Pseudo instructions do not need REX prefix byte. + if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) + return 0; + if (Desc.TSFlags & X86II::REX_W) + REX |= 1 << 3; + + unsigned NumOps = Desc.getNumOperands(); + if (NumOps) { + bool isTwoAddr = NumOps > 1 && + Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; + + // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. + unsigned i = isTwoAddr ? 1 : 0; + for (unsigned e = NumOps; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isRegister()) { + unsigned Reg = MO.getReg(); + if (isX86_64NonExtLowByteReg(Reg)) + REX |= 0x40; + } + } + + switch (Desc.TSFlags & X86II::FormMask) { + case X86II::MRMInitReg: + if (isX86_64ExtendedReg(MI.getOperand(0))) + REX |= (1 << 0) | (1 << 2); + break; + case X86II::MRMSrcReg: { + if (isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 2; + i = isTwoAddr ? 2 : 1; + for (unsigned e = NumOps; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (isX86_64ExtendedReg(MO)) + REX |= 1 << 0; + } + break; + } + case X86II::MRMSrcMem: { + if (isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 2; + unsigned Bit = 0; + i = isTwoAddr ? 2 : 1; + for (; i != NumOps; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isRegister()) { + if (isX86_64ExtendedReg(MO)) + REX |= 1 << Bit; + Bit++; + } + } + break; + } + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + case X86II::MRMDestMem: { + unsigned e = isTwoAddr ? 5 : 4; + i = isTwoAddr ? 1 : 0; + if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e))) + REX |= 1 << 2; + unsigned Bit = 0; + for (; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isRegister()) { + if (isX86_64ExtendedReg(MO)) + REX |= 1 << Bit; + Bit++; + } + } + break; + } + default: { + if (isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 0; + i = isTwoAddr ? 2 : 1; + for (unsigned e = NumOps; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (isX86_64ExtendedReg(MO)) + REX |= 1 << 2; + } + break; + } + } + } + return REX; +} + +/// sizePCRelativeBlockAddress - This method returns the size of a PC +/// relative block address instruction +/// +static unsigned sizePCRelativeBlockAddress() { + return 4; +} + +/// sizeGlobalAddress - Give the size of the emission of this global address +/// +static unsigned sizeGlobalAddress(bool dword) { + return dword ? 8 : 4; +} + +/// sizeConstPoolAddress - Give the size of the emission of this constant +/// pool address +/// +static unsigned sizeConstPoolAddress(bool dword) { + return dword ? 8 : 4; +} + +/// sizeExternalSymbolAddress - Give the size of the emission of this external +/// symbol +/// +static unsigned sizeExternalSymbolAddress(bool dword) { + return dword ? 8 : 4; +} + +/// sizeJumpTableAddress - Give the size of the emission of this jump +/// table address +/// +static unsigned sizeJumpTableAddress(bool dword) { + return dword ? 8 : 4; +} + +static unsigned sizeConstant(unsigned Size) { + return Size; +} + +static unsigned sizeRegModRMByte(){ + return 1; +} + +static unsigned sizeSIBByte(){ + return 1; +} + +static unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) { + unsigned FinalSize = 0; + // If this is a simple integer displacement that doesn't require a relocation. + if (!RelocOp) { + FinalSize += sizeConstant(4); + return FinalSize; + } + + // Otherwise, this is something that requires a relocation. + if (RelocOp->isGlobalAddress()) { + FinalSize += sizeGlobalAddress(false); + } else if (RelocOp->isConstantPoolIndex()) { + FinalSize += sizeConstPoolAddress(false); + } else if (RelocOp->isJumpTableIndex()) { + FinalSize += sizeJumpTableAddress(false); + } else { + assert(0 && "Unknown value to relocate!"); + } + return FinalSize; +} + +static unsigned getMemModRMByteSize(const MachineInstr &MI, unsigned Op, + bool IsPIC, bool Is64BitMode) { + const MachineOperand &Op3 = MI.getOperand(Op+3); + int DispVal = 0; + const MachineOperand *DispForReloc = 0; + unsigned FinalSize = 0; + + // Figure out what sort of displacement we have to handle here. + if (Op3.isGlobalAddress()) { + DispForReloc = &Op3; + } else if (Op3.isConstantPoolIndex()) { + if (Is64BitMode || IsPIC) { + DispForReloc = &Op3; + } else { + DispVal = 1; + } + } else if (Op3.isJumpTableIndex()) { + if (Is64BitMode || IsPIC) { + DispForReloc = &Op3; + } else { + DispVal = 1; + } + } else { + DispVal = 1; + } + + const MachineOperand &Base = MI.getOperand(Op); + const MachineOperand &IndexReg = MI.getOperand(Op+2); + + unsigned BaseReg = Base.getReg(); + + // Is a SIB byte needed? + if (IndexReg.getReg() == 0 && + (BaseReg == 0 || X86RegisterInfo::getX86RegNum(BaseReg) != N86::ESP)) { + if (BaseReg == 0) { // Just a displacement? + // Emit special case [disp32] encoding + ++FinalSize; + FinalSize += getDisplacementFieldSize(DispForReloc); + } else { + unsigned BaseRegNo = X86RegisterInfo::getX86RegNum(BaseReg); + if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { + // Emit simple indirect register encoding... [EAX] f.e. + ++FinalSize; + // Be pessimistic and assume it's a disp32, not a disp8 + } else { + // Emit the most general non-SIB encoding: [REG+disp32] + ++FinalSize; + FinalSize += getDisplacementFieldSize(DispForReloc); + } + } + + } else { // We need a SIB byte, so start by outputting the ModR/M byte first + assert(IndexReg.getReg() != X86::ESP && + IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); + + bool ForceDisp32 = false; + if (BaseReg == 0 || DispForReloc) { + // Emit the normal disp32 encoding. + ++FinalSize; + ForceDisp32 = true; + } else { + ++FinalSize; + } + + FinalSize += sizeSIBByte(); + + // Do we need to output a displacement? + if (DispVal != 0 || ForceDisp32) { + FinalSize += getDisplacementFieldSize(DispForReloc); + } + } + return FinalSize; +} + + +static unsigned GetInstSizeWithDesc(const MachineInstr &MI, + const TargetInstrDesc *Desc, + bool IsPIC, bool Is64BitMode) { + + unsigned Opcode = Desc->Opcode; + unsigned FinalSize = 0; + + // Emit the lock opcode prefix as needed. + if (Desc->TSFlags & X86II::LOCK) ++FinalSize; + + // Emit the repeat opcode prefix as needed. + if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) ++FinalSize; + + // Emit the operand size opcode prefix as needed. + if (Desc->TSFlags & X86II::OpSize) ++FinalSize; + + // Emit the address size opcode prefix as needed. + if (Desc->TSFlags & X86II::AdSize) ++FinalSize; + + bool Need0FPrefix = false; + switch (Desc->TSFlags & X86II::Op0Mask) { + case X86II::TB: // Two-byte opcode prefix + case X86II::T8: // 0F 38 + case X86II::TA: // 0F 3A + Need0FPrefix = true; + break; + case X86II::REP: break; // already handled. + case X86II::XS: // F3 0F + ++FinalSize; + Need0FPrefix = true; + break; + case X86II::XD: // F2 0F + ++FinalSize; + Need0FPrefix = true; + break; + case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB: + case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF: + ++FinalSize; + break; // Two-byte opcode prefix + default: assert(0 && "Invalid prefix!"); + case 0: break; // No prefix! + } + + if (Is64BitMode) { + // REX prefix + unsigned REX = X86InstrInfo::determineREX(MI); + if (REX) + ++FinalSize; + } + + // 0x0F escape code must be emitted just before the opcode. + if (Need0FPrefix) + ++FinalSize; + + switch (Desc->TSFlags & X86II::Op0Mask) { + case X86II::T8: // 0F 38 + ++FinalSize; + break; + case X86II::TA: // 0F 3A + ++FinalSize; + break; + } + + // If this is a two-address instruction, skip one of the register operands. + unsigned NumOps = Desc->getNumOperands(); + unsigned CurOp = 0; + if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1) + CurOp++; + + switch (Desc->TSFlags & X86II::FormMask) { + default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!"); + case X86II::Pseudo: + // Remember the current PC offset, this is the PIC relocation + // base address. + switch (Opcode) { + default: + break; + case TargetInstrInfo::INLINEASM: { + const MachineFunction *MF = MI.getParent()->getParent(); + const char *AsmStr = MI.getOperand(0).getSymbolName(); + const TargetAsmInfo* AI = MF->getTarget().getTargetAsmInfo(); + FinalSize += AI->getInlineAsmLength(AsmStr); + break; + } + case TargetInstrInfo::LABEL: + break; + case TargetInstrInfo::IMPLICIT_DEF: + case TargetInstrInfo::DECLARE: + case X86::DWARF_LOC: + case X86::FP_REG_KILL: + break; + case X86::MOVPC32r: { + // This emits the "call" portion of this pseudo instruction. + ++FinalSize; + FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + break; + } + } + CurOp = NumOps; + break; + case X86II::RawFrm: + ++FinalSize; + + if (CurOp != NumOps) { + const MachineOperand &MO = MI.getOperand(CurOp++); + if (MO.isMachineBasicBlock()) { + FinalSize += sizePCRelativeBlockAddress(); + } else if (MO.isGlobalAddress()) { + FinalSize += sizeGlobalAddress(false); + } else if (MO.isExternalSymbol()) { + FinalSize += sizeExternalSymbolAddress(false); + } else if (MO.isImmediate()) { + FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + } else { + assert(0 && "Unknown RawFrm operand!"); + } + } + break; + + case X86II::AddRegFrm: + ++FinalSize; + ++CurOp; + + if (CurOp != NumOps) { + const MachineOperand &MO1 = MI.getOperand(CurOp++); + unsigned Size = X86InstrInfo::sizeOfImm(Desc); + if (MO1.isImmediate()) + FinalSize += sizeConstant(Size); + else { + bool dword = false; + if (Opcode == X86::MOV64ri) + dword = true; + if (MO1.isGlobalAddress()) { + FinalSize += sizeGlobalAddress(dword); + } else if (MO1.isExternalSymbol()) + FinalSize += sizeExternalSymbolAddress(dword); + else if (MO1.isConstantPoolIndex()) + FinalSize += sizeConstPoolAddress(dword); + else if (MO1.isJumpTableIndex()) + FinalSize += sizeJumpTableAddress(dword); + } + } + break; + + case X86II::MRMDestReg: { + ++FinalSize; + FinalSize += sizeRegModRMByte(); + CurOp += 2; + if (CurOp != NumOps) { + ++CurOp; + FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + } + break; + } + case X86II::MRMDestMem: { + ++FinalSize; + FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); + CurOp += 5; + if (CurOp != NumOps) { + ++CurOp; + FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + } + break; + } + + case X86II::MRMSrcReg: + ++FinalSize; + FinalSize += sizeRegModRMByte(); + CurOp += 2; + if (CurOp != NumOps) { + ++CurOp; + FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + } + break; + + case X86II::MRMSrcMem: { + + ++FinalSize; + FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode); + CurOp += 5; + if (CurOp != NumOps) { + ++CurOp; + FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + } + break; + } + + case X86II::MRM0r: case X86II::MRM1r: + case X86II::MRM2r: case X86II::MRM3r: + case X86II::MRM4r: case X86II::MRM5r: + case X86II::MRM6r: case X86II::MRM7r: + ++FinalSize; + ++CurOp; + FinalSize += sizeRegModRMByte(); + + if (CurOp != NumOps) { + const MachineOperand &MO1 = MI.getOperand(CurOp++); + unsigned Size = X86InstrInfo::sizeOfImm(Desc); + if (MO1.isImmediate()) + FinalSize += sizeConstant(Size); + else { + bool dword = false; + if (Opcode == X86::MOV64ri32) + dword = true; + if (MO1.isGlobalAddress()) { + FinalSize += sizeGlobalAddress(dword); + } else if (MO1.isExternalSymbol()) + FinalSize += sizeExternalSymbolAddress(dword); + else if (MO1.isConstantPoolIndex()) + FinalSize += sizeConstPoolAddress(dword); + else if (MO1.isJumpTableIndex()) + FinalSize += sizeJumpTableAddress(dword); + } + } + break; + + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: { + + ++FinalSize; + FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); + CurOp += 4; + + if (CurOp != NumOps) { + const MachineOperand &MO = MI.getOperand(CurOp++); + unsigned Size = X86InstrInfo::sizeOfImm(Desc); + if (MO.isImmediate()) + FinalSize += sizeConstant(Size); + else { + bool dword = false; + if (Opcode == X86::MOV64mi32) + dword = true; + if (MO.isGlobalAddress()) { + FinalSize += sizeGlobalAddress(dword); + } else if (MO.isExternalSymbol()) + FinalSize += sizeExternalSymbolAddress(dword); + else if (MO.isConstantPoolIndex()) + FinalSize += sizeConstPoolAddress(dword); + else if (MO.isJumpTableIndex()) + FinalSize += sizeJumpTableAddress(dword); + } + } + break; + } + + case X86II::MRMInitReg: + ++FinalSize; + // Duplicate register, used by things like MOV8r0 (aka xor reg,reg). + FinalSize += sizeRegModRMByte(); + ++CurOp; + break; + } + + if (!Desc->isVariadic() && CurOp != NumOps) { + cerr << "Cannot determine size: "; + MI.dump(); + cerr << '\n'; + abort(); + } + + + return FinalSize; +} + + +unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { + const TargetInstrDesc &Desc = MI->getDesc(); + bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_); + bool Is64BitMode = TM.getSubtargetImpl()->is64Bit(); + unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode); + if (Desc.getOpcode() == X86::MOVPC32r) { + Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode); + } + return Size; +}