X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86CodeEmitter.cpp;h=c3734a2ee32ef08d075793aa1dc6d97ee6290500;hb=242c9f4615feeee2fbdd1f29cd9a8e8ffd43c075;hp=09524fe2e4726fd3a4899ccbe5ad0ed43ebd07a3;hpb=ff72e74d34197cd8775ed32c6e06054b936a1cad;p=oota-llvm.git diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 09524fe2e47..c3734a2ee32 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -13,24 +13,23 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "x86-emitter" +#include "X86.h" #include "X86InstrInfo.h" #include "X86JITInfo.h" +#include "X86Relocations.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" -#include "X86Relocations.h" -#include "X86.h" -#include "llvm/LLVMContext.h" -#include "llvm/PassManager.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/JITCodeEmitter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Function.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/PassManager.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -43,7 +42,7 @@ namespace { template class Emitter : public MachineFunctionPass { const X86InstrInfo *II; - const TargetData *TD; + const DataLayout *TD; X86TargetMachine &TM; CodeEmitter &MCE; MachineModuleInfo *MMI; @@ -53,12 +52,12 @@ namespace { public: static char ID; explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce) - : MachineFunctionPass(ID), II(0), TD(0), TM(tm), + : MachineFunctionPass(ID), II(0), TD(0), TM(tm), MCE(mce), PICBaseOffset(0), Is64BitMode(false), IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} Emitter(X86TargetMachine &tm, CodeEmitter &mce, - const X86InstrInfo &ii, const TargetData &td, bool is64) - : MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm), + const X86InstrInfo &ii, const DataLayout &td, bool is64) + : MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm), MCE(mce), PICBaseOffset(0), Is64BitMode(is64), IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} @@ -81,7 +80,7 @@ namespace { const MachineInstr &MI) const; void emitInstruction(MachineInstr &MI, const MCInstrDesc *Desc); - + void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); @@ -110,6 +109,14 @@ namespace { void emitMemModRMByte(const MachineInstr &MI, unsigned Op, unsigned RegOpcodeField, intptr_t PCAdj = 0); + + unsigned getX86RegNum(unsigned RegNo) const { + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + return TRI->getEncodingValue(RegNo) & 0x7; + } + + unsigned char getVEXRegisterEncoding(const MachineInstr &MI, + unsigned OpNum) const; }; template @@ -117,7 +124,7 @@ template } // end anonymous namespace. /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code -/// to the specified templated MachineCodeEmitter object. +/// to the specified JITCodeEmitter object. FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM, JITCodeEmitter &JCE) { return new Emitter(TM, JCE); @@ -127,17 +134,16 @@ template bool Emitter::runOnMachineFunction(MachineFunction &MF) { MMI = &getAnalysis(); MCE.setModuleInfo(MMI); - + II = TM.getInstrInfo(); - TD = TM.getTargetData(); + TD = TM.getDataLayout(); Is64BitMode = TM.getSubtarget().is64Bit(); IsPIC = TM.getRelocationModel() == Reloc::PIC_; - + do { - DEBUG(dbgs() << "JITTing function '" - << MF.getFunction()->getName() << "'\n"); + DEBUG(dbgs() << "JITTing function '" << MF.getName() << "'\n"); MCE.startFunction(MF); - for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); + for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { MCE.StartMachineBasicBlock(MBB); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); @@ -161,18 +167,18 @@ bool Emitter::runOnMachineFunction(MachineFunction &MF) { static unsigned determineREX(const MachineInstr &MI) { unsigned REX = 0; const MCInstrDesc &Desc = MI.getDesc(); - + // Pseudo instructions do not need REX prefix byte. if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) return 0; if (Desc.TSFlags & X86II::REX_W) REX |= 1 << 3; - + unsigned NumOps = Desc.getNumOperands(); if (NumOps) { bool isTwoAddr = NumOps > 1 && - Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1; - + Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1; + // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. unsigned i = isTwoAddr ? 1 : 0; for (unsigned e = NumOps; i != e; ++i) { @@ -183,7 +189,7 @@ static unsigned determineREX(const MachineInstr &MI) { REX |= 0x40; } } - + switch (Desc.TSFlags & X86II::FormMask) { case X86II::MRMInitReg: if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) @@ -365,7 +371,7 @@ inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, template void Emitter::emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeFld){ - MCE.emitByte(ModRMByte(3, RegOpcodeFld, X86_MC::getX86RegNum(ModRMReg))); + MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg))); } template @@ -374,7 +380,7 @@ void Emitter::emitRegModRMByte(unsigned RegOpcodeFld) { } template -void Emitter::emitSIBByte(unsigned SS, +void Emitter::emitSIBByte(unsigned SS, unsigned Index, unsigned Base) { // SIB byte is in the same format as the ModRMByte... @@ -390,8 +396,8 @@ void Emitter::emitConstant(uint64_t Val, unsigned Size) { } } -/// isDisp8 - Return true if this signed displacement fits in a 8-bit -/// sign-extended field. +/// isDisp8 - Return true if this signed displacement fits in a 8-bit +/// sign-extended field. static bool isDisp8(int Value) { return Value == (signed char)Value; } @@ -400,10 +406,10 @@ static bool gvNeedsNonLazyPtr(const MachineOperand &GVOp, const TargetMachine &TM) { // For Darwin-64, simulate the linktime GOT by using the same non-lazy-pointer // mechanism as 32-bit mode. - if (TM.getSubtarget().is64Bit() && + if (TM.getSubtarget().is64Bit() && !TM.getSubtarget().isTargetDarwin()) return false; - + // Return true if this is a reference to a stub containing the address of the // global, not the global itself. return isGlobalStubReference(GVOp.getTargetFlags()); @@ -429,7 +435,7 @@ void Emitter::emitDisplacementField(const MachineOperand *RelocOp, if (RelocOp->isGlobal()) { // In 64-bit static small code model, we could potentially emit absolute. // But it's probably not beneficial. If the MCE supports using RIP directly - // do it, otherwise fallback to absolute (this is determined by IsPCRel). + // do it, otherwise fallback to absolute (this is determined by IsPCRel). // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM); @@ -453,7 +459,7 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, const MachineOperand &Op3 = MI.getOperand(Op+3); int DispVal = 0; const MachineOperand *DispForReloc = 0; - + // Figure out what sort of displacement we have to handle here. if (Op3.isGlobal()) { DispForReloc = &Op3; @@ -481,7 +487,7 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, const MachineOperand &IndexReg = MI.getOperand(Op+2); unsigned BaseReg = Base.getReg(); - + // Handle %rip relative addressing. if (BaseReg == X86::RIP || (Is64BitMode && DispForReloc)) { // [disp32+RIP] in X86-64 mode @@ -498,15 +504,15 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, bool IsPCRel = MCE.earlyResolveAddresses() ? true : false; // Is a SIB byte needed? - // If no BaseReg, issue a RIP relative instruction only if the MCE can + // If no BaseReg, issue a RIP relative instruction only if the MCE can // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table // 2-7) and absolute references. unsigned BaseRegNo = -1U; if (BaseReg != 0 && BaseReg != X86::RIP) - BaseRegNo = X86_MC::getX86RegNum(BaseReg); + BaseRegNo = getX86RegNum(BaseReg); if (// The SIB byte must be used if there is an index register. - IndexReg.getReg() == 0 && + IndexReg.getReg() == 0 && // The SIB byte must be used if the base is ESP/RSP/R12, all of which // encode to an R/M value of 4, which indicates that a SIB byte is // present. @@ -520,7 +526,7 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, emitDisplacementField(DispForReloc, DispVal, PCAdj, true); return; } - + // If the base is not EBP/ESP and there is no displacement, use simple // indirect register encoding, this handles addresses like [EAX]. The // encoding for [EBP] with no displacement means [disp32] so we handle it @@ -529,20 +535,20 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo)); return; } - + // Otherwise, if the displacement fits in a byte, encode as [REG+disp8]. if (!DispForReloc && isDisp8(DispVal)) { MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo)); emitConstant(DispVal, 1); return; } - + // Otherwise, emit the most general non-SIB encoding: [REG+disp32] MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo)); emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); return; } - + // Otherwise we need a SIB byte, so start by outputting the ModR/M byte first. assert(IndexReg.getReg() != X86::ESP && IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); @@ -575,19 +581,19 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, unsigned SS = SSTable[Scale.getImm()]; if (BaseReg == 0) { - // Handle the SIB byte for the case where there is no base, see Intel + // Handle the SIB byte for the case where there is no base, see Intel // Manual 2A, table 2-7. The displacement has already been output. unsigned IndexRegNo; if (IndexReg.getReg()) - IndexRegNo = X86_MC::getX86RegNum(IndexReg.getReg()); + IndexRegNo = getX86RegNum(IndexReg.getReg()); else // Examples: [ESP+1*+4] or [scaled idx]+disp32 (MOD=0,BASE=5) IndexRegNo = 4; emitSIBByte(SS, IndexRegNo, 5); } else { - unsigned BaseRegNo = X86_MC::getX86RegNum(BaseReg); + unsigned BaseRegNo = getX86RegNum(BaseReg); unsigned IndexRegNo; if (IndexReg.getReg()) - IndexRegNo = X86_MC::getX86RegNum(IndexReg.getReg()); + IndexRegNo = getX86RegNum(IndexReg.getReg()); else IndexRegNo = 4; // For example [ESP+1*+4] emitSIBByte(SS, IndexRegNo, BaseRegNo); @@ -749,10 +755,6 @@ void Emitter::emitOpcodePrefix(uint64_t TSFlags, } } -static unsigned GetX86RegNum(const MachineOperand &MO) { - return X86_MC::getX86RegNum(MO.getReg()); -} - // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range // 0-7 and the difference between the 2 groups is given by the REX prefix. // In the VEX prefix, registers are seen sequencially from 0-15 and encoded @@ -762,10 +764,12 @@ static unsigned GetX86RegNum(const MachineOperand &MO) { // VEX.VVVV => XMM9 => ~9 // // See table 4-35 of Intel AVX Programming Reference for details. -static unsigned char getVEXRegisterEncoding(const MachineInstr &MI, - unsigned OpNum) { +template +unsigned char +Emitter::getVEXRegisterEncoding(const MachineInstr &MI, + unsigned OpNum) const { unsigned SrcReg = MI.getOperand(OpNum).getReg(); - unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum)); + unsigned SrcRegNum = getX86RegNum(MI.getOperand(OpNum).getReg()); if (X86II::isX86_64ExtendedReg(SrcReg)) SrcRegNum |= 8; @@ -812,6 +816,7 @@ void Emitter::emitVEXOpcodePrefix(uint64_t TSFlags, const MCInstrDesc *Desc) const { bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; // VEX_R: opcode externsion equivalent to REX.R in // 1's complement (inverted) form @@ -927,17 +932,18 @@ void Emitter::emitVEXOpcodePrefix(uint64_t TSFlags, } - // Set the vector length to 256-bit if YMM0-YMM15 is used - for (unsigned i = 0; i != MI.getNumOperands(); ++i) { - if (!MI.getOperand(i).isReg()) - continue; - unsigned SrcReg = MI.getOperand(i).getReg(); - if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15) - VEX_L = 1; - } - // Classify VEX_B, VEX_4V, VEX_R, VEX_X + unsigned NumOps = Desc->getNumOperands(); unsigned CurOp = 0; + if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0) + ++CurOp; + else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) { + assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); + // Special case for GATHER with 2 TIED_TO operands + // Skip the first 2 operands: dst, mask_wb + CurOp += 2; + } + switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: // Duplicate register. @@ -1027,6 +1033,10 @@ void Emitter::emitVEXOpcodePrefix(uint64_t TSFlags, if (HasVEX_4V) VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (HasMemOp4) // Skip second register source (encoded in I8IMM) + CurOp++; + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; CurOp++; @@ -1037,9 +1047,15 @@ void Emitter::emitVEXOpcodePrefix(uint64_t TSFlags, // MRMDestReg instructions forms: // dst(ModR/M), src(ModR/M) // dst(ModR/M), src(ModR/M), imm8 - if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + // dst(ModR/M), src1(VEX_4V), src2(ModR/M) + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; - if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + CurOp++; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; break; case X86II::MRM0r: case X86II::MRM1r: @@ -1118,11 +1134,14 @@ void Emitter::emitInstruction(MachineInstr &MI, // If this is a two-address instruction, skip one of the register operands. unsigned NumOps = Desc->getNumOperands(); unsigned CurOp = 0; - if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) != -1) + if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0) ++CurOp; - else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1,MCOI::TIED_TO)== 0) - // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 - --NumOps; + else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) { + assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); + // Special case for GATHER with 2 TIED_TO operands + // Skip the first 2 operands: dst, mask_wb + CurOp += 2; + } uint64_t TSFlags = Desc->TSFlags; @@ -1132,6 +1151,7 @@ void Emitter::emitInstruction(MachineInstr &MI, bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; + const unsigned MemOp4_I8IMMOperand = 2; // Determine where the memory operand starts, if present. int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode); @@ -1150,16 +1170,15 @@ void Emitter::emitInstruction(MachineInstr &MI, // Remember the current PC offset, this is the PIC relocation // base address. switch (Opcode) { - default: + default: llvm_unreachable("pseudo instructions should be removed before code" " emission"); - break; // Do nothing for Int_MemBarrier - it's just a comment. Add a debug // to make it slightly easier to see. case X86::Int_MemBarrier: DEBUG(dbgs() << "#MEMBARRIER\n"); break; - + case TargetOpcode::INLINEASM: // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. @@ -1171,7 +1190,7 @@ void Emitter::emitInstruction(MachineInstr &MI, case TargetOpcode::EH_LABEL: MCE.emitLabel(MI.getOperand(0).getMCSymbol()); break; - + case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: break; @@ -1193,7 +1212,7 @@ void Emitter::emitInstruction(MachineInstr &MI, if (CurOp == NumOps) break; - + const MachineOperand &MO = MI.getOperand(CurOp++); DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n"); @@ -1206,13 +1225,13 @@ void Emitter::emitInstruction(MachineInstr &MI, emitPCRelativeBlockAddress(MO.getMBB()); break; } - + if (MO.isGlobal()) { emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word, MO.getOffset(), 0); break; } - + if (MO.isSymbol()) { emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word); break; @@ -1223,7 +1242,7 @@ void Emitter::emitInstruction(MachineInstr &MI, emitJumpTableAddress(MO.getIndex(), X86::reloc_pcrel_word); break; } - + assert(MO.isImm() && "Unknown RawFrm operand!"); if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) { // Fix up immediate operand for pc relative calls. @@ -1234,24 +1253,24 @@ void Emitter::emitInstruction(MachineInstr &MI, emitConstant(MO.getImm(), X86II::getSizeOfImm(Desc->TSFlags)); break; } - + case X86II::AddRegFrm: { MCE.emitByte(BaseOpcode + - X86_MC::getX86RegNum(MI.getOperand(CurOp++).getReg())); - + getX86RegNum(MI.getOperand(CurOp++).getReg())); + if (CurOp == NumOps) break; - + const MachineOperand &MO1 = MI.getOperand(CurOp++); unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); if (MO1.isImm()) { emitConstant(MO1.getImm(), Size); break; } - + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); - if (Opcode == X86::MOV64ri64i32) + if (Opcode == X86::MOV32ri64) rt = X86::reloc_absolute_word; // FIXME: add X86II flag? // This should not occur on Darwin for relocatable objects. if (Opcode == X86::MOV64ri) @@ -1271,12 +1290,14 @@ void Emitter::emitInstruction(MachineInstr &MI, case X86II::MRMDestReg: { MCE.emitByte(BaseOpcode); + + unsigned SrcRegNum = CurOp+1; + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + SrcRegNum++; + emitRegModRMByte(MI.getOperand(CurOp).getReg(), - X86_MC::getX86RegNum(MI.getOperand(CurOp+1).getReg())); - CurOp += 2; - if (CurOp != NumOps) - emitConstant(MI.getOperand(CurOp++).getImm(), - X86II::getSizeOfImm(Desc->TSFlags)); + getX86RegNum(MI.getOperand(SrcRegNum).getReg())); + CurOp = SrcRegNum + 1; break; } case X86II::MRMDestMem: { @@ -1286,11 +1307,8 @@ void Emitter::emitInstruction(MachineInstr &MI, if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) SrcRegNum++; emitMemModRMByte(MI, CurOp, - X86_MC::getX86RegNum(MI.getOperand(SrcRegNum).getReg())); + getX86RegNum(MI.getOperand(SrcRegNum).getReg())); CurOp = SrcRegNum + 1; - if (CurOp != NumOps) - emitConstant(MI.getOperand(CurOp++).getImm(), - X86II::getSizeOfImm(Desc->TSFlags)); break; } @@ -1299,20 +1317,17 @@ void Emitter::emitInstruction(MachineInstr &MI, unsigned SrcRegNum = CurOp+1; if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) - SrcRegNum++; + ++SrcRegNum; - if(HasMemOp4) // Skip 2nd src (which is encoded in I8IMM) - SrcRegNum++; + if (HasMemOp4) // Skip 2nd src (which is encoded in I8IMM) + ++SrcRegNum; emitRegModRMByte(MI.getOperand(SrcRegNum).getReg(), - X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg())); - // 2 operands skipped with HasMemOp4, comensate accordingly + getX86RegNum(MI.getOperand(CurOp).getReg())); + // 2 operands skipped with HasMemOp4, compensate accordingly CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1; if (HasVEX_4VOp3) ++CurOp; - if (CurOp != NumOps) - emitConstant(MI.getOperand(CurOp++).getImm(), - X86II::getSizeOfImm(Desc->TSFlags)); break; } case X86II::MRMSrcMem: { @@ -1322,7 +1337,7 @@ void Emitter::emitInstruction(MachineInstr &MI, ++AddrOperands; ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV). } - if(HasMemOp4) // Skip second register source (encoded in I8IMM) + if (HasMemOp4) // Skip second register source (encoded in I8IMM) ++FirstMemOp; MCE.emitByte(BaseOpcode); @@ -1330,13 +1345,10 @@ void Emitter::emitInstruction(MachineInstr &MI, intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ? X86II::getSizeOfImm(Desc->TSFlags) : 0; emitMemModRMByte(MI, FirstMemOp, - X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj); + getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj); CurOp += AddrOperands + 1; if (HasVEX_4VOp3) ++CurOp; - if (CurOp != NumOps) - emitConstant(MI.getOperand(CurOp++).getImm(), - X86II::getSizeOfImm(Desc->TSFlags)); break; } @@ -1345,21 +1357,21 @@ void Emitter::emitInstruction(MachineInstr &MI, case X86II::MRM4r: case X86II::MRM5r: case X86II::MRM6r: case X86II::MRM7r: { if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). - CurOp++; + ++CurOp; MCE.emitByte(BaseOpcode); emitRegModRMByte(MI.getOperand(CurOp++).getReg(), (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); if (CurOp == NumOps) break; - + const MachineOperand &MO1 = MI.getOperand(CurOp++); unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); if (MO1.isImm()) { emitConstant(MO1.getImm(), Size); break; } - + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); if (Opcode == X86::MOV64ri32) @@ -1382,9 +1394,9 @@ void Emitter::emitInstruction(MachineInstr &MI, case X86II::MRM4m: case X86II::MRM5m: case X86II::MRM6m: case X86II::MRM7m: { if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). - CurOp++; + ++CurOp; intptr_t PCAdj = (CurOp + X86::AddrNumOperands != NumOps) ? - (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ? + (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ? X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0; MCE.emitByte(BaseOpcode); @@ -1394,14 +1406,14 @@ void Emitter::emitInstruction(MachineInstr &MI, if (CurOp == NumOps) break; - + const MachineOperand &MO = MI.getOperand(CurOp++); unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); if (MO.isImm()) { emitConstant(MO.getImm(), Size); break; } - + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); if (Opcode == X86::MOV64mi32) @@ -1423,10 +1435,10 @@ void Emitter::emitInstruction(MachineInstr &MI, MCE.emitByte(BaseOpcode); // Duplicate register, used by things like MOV8r0 (aka xor reg,reg). emitRegModRMByte(MI.getOperand(CurOp).getReg(), - X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg())); + getX86RegNum(MI.getOperand(CurOp).getReg())); ++CurOp; break; - + case X86II::MRM_C1: MCE.emitByte(BaseOpcode); MCE.emitByte(0xC1); @@ -1439,6 +1451,14 @@ void Emitter::emitInstruction(MachineInstr &MI, MCE.emitByte(BaseOpcode); MCE.emitByte(0xC9); break; + case X86II::MRM_CA: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xCA); + break; + case X86II::MRM_CB: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xCB); + break; case X86II::MRM_E8: MCE.emitByte(BaseOpcode); MCE.emitByte(0xE8); @@ -1449,6 +1469,33 @@ void Emitter::emitInstruction(MachineInstr &MI, break; } + while (CurOp != NumOps && NumOps - CurOp <= 2) { + // The last source register of a 4 operand instruction in AVX is encoded + // in bits[7:4] of a immediate byte. + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) { + const MachineOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand + : CurOp); + ++CurOp; + unsigned RegNum = getX86RegNum(MO.getReg()) << 4; + if (X86II::isX86_64ExtendedReg(MO.getReg())) + RegNum |= 1 << 7; + // If there is an additional 5th operand it must be an immediate, which + // is encoded in bits[3:0] + if (CurOp != NumOps) { + const MachineOperand &MIMM = MI.getOperand(CurOp++); + if (MIMM.isImm()) { + unsigned Val = MIMM.getImm(); + assert(Val < 16 && "Immediate operand value out of range"); + RegNum |= Val; + } + } + emitConstant(RegNum, 1); + } else { + emitConstant(MI.getOperand(CurOp++).getImm(), + X86II::getSizeOfImm(Desc->TSFlags)); + } + } + if (!MI.isVariadic() && CurOp != NumOps) { #ifndef NDEBUG dbgs() << "Cannot encode all operands of: " << MI << "\n";