From: Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
Date: Sat, 5 Jun 2010 03:53:24 +0000 (+0000)
Subject: Initial AVX support for some instructions. No patterns matched
X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=3eca98bb3ab1ec27ab8763298c416d282cdaa261;p=oota-llvm.git

Initial AVX support for some instructions. No patterns matched
yet, only assembly encoding support.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@105521 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index ca551e586a5..6379459488f 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -244,7 +244,7 @@ class Instruction {
   string DisableEncoding = "";
 
   /// Target-specific flags. This becomes the TSFlags field in TargetInstrDesc.
-  bits<32> TSFlags = 0;
+  bits<64> TSFlags = 0;
 }
 
 /// Predicates - These are extra conditionals which are turned into instruction
diff --git a/include/llvm/Target/TargetInstrDesc.h b/include/llvm/Target/TargetInstrDesc.h
index adc37e16e45..a004f95ebc5 100644
--- a/include/llvm/Target/TargetInstrDesc.h
+++ b/include/llvm/Target/TargetInstrDesc.h
@@ -15,6 +15,8 @@
 #ifndef LLVM_TARGET_TARGETINSTRDESC_H
 #define LLVM_TARGET_TARGETINSTRDESC_H
 
+#include "llvm/System/DataTypes.h"
+
 namespace llvm {
 
 class TargetRegisterClass;
@@ -131,7 +133,7 @@ public:
   unsigned short  SchedClass;    // enum identifying instr sched class
   const char *    Name;          // Name of the instruction record in td file
   unsigned        Flags;         // Flags identifying machine instr class
-  unsigned        TSFlags;       // Target Specific Flag values
+  uint64_t        TSFlags;       // Target Specific Flag values
   const unsigned *ImplicitUses;  // Registers implicitly read by this instr
   const unsigned *ImplicitDefs;  // Registers implicitly defined by this instr
   const TargetRegisterClass **RCBarriers; // Reg classes completely "clobbered"
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index c5fdf8f967f..01095626a99 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -56,7 +56,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
 
   MachineInstr *MI = MBBI;
   MachineFunction &MF = *MI->getParent()->getParent();
-  unsigned TSFlags = MI->getDesc().TSFlags;
+  uint64_t TSFlags = MI->getDesc().TSFlags;
   bool isPre = false;
   switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
   default: return NULL;
@@ -488,7 +488,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
 
   // Basic size info comes from the TSFlags field.
   const TargetInstrDesc &TID = MI->getDesc();
-  unsigned TSFlags = TID.TSFlags;
+  uint64_t TSFlags = TID.TSFlags;
 
   unsigned Opc = MI->getOpcode();
   switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
index b1d90df3417..7d21256a14f 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
@@ -137,25 +137,25 @@ static inline void setSlice(uint32_t &Bits, unsigned From, unsigned To,
 /// Various utilities for checking the target specific flags.
 
 /// A unary data processing instruction doesn't have an Rn operand.
-static inline bool isUnaryDP(unsigned TSFlags) {
+static inline bool isUnaryDP(uint64_t TSFlags) {
   return (TSFlags & ARMII::UnaryDP);
 }
 
 /// This four-bit field describes the addressing mode used.
 /// See also ARMBaseInstrInfo.h.
-static inline unsigned getAddrMode(unsigned TSFlags) {
+static inline unsigned getAddrMode(uint64_t TSFlags) {
   return (TSFlags & ARMII::AddrModeMask);
 }
 
 /// {IndexModePre, IndexModePost}
 /// Only valid for load and store ops.
 /// See also ARMBaseInstrInfo.h.
-static inline unsigned getIndexMode(unsigned TSFlags) {
+static inline unsigned getIndexMode(uint64_t TSFlags) {
   return (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
 }
 
 /// Pre-/post-indexed operations define an extra $base_wb in the OutOperandList.
-static inline bool isPrePostLdSt(unsigned TSFlags) {
+static inline bool isPrePostLdSt(uint64_t TSFlags) {
   return (TSFlags & ARMII::IndexModeMask) != 0;
 }
 
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 66dfd4b7379..db11fdeb7c1 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -78,7 +78,7 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
   isLoad  = TID.mayLoad();
   isStore = TID.mayStore();
   
-  unsigned TSFlags = TID.TSFlags;
+  uint64_t TSFlags = TID.TSFlags;
   
   isFirst   = TSFlags & PPCII::PPC970_First;
   isSingle  = TSFlags & PPCII::PPC970_Single;
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 93460ef308c..5fe21ac8a19 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -232,7 +232,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
 
   for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
     MachineInstr *MI = I;
-    unsigned Flags = MI->getDesc().TSFlags;
+    uint64_t Flags = MI->getDesc().TSFlags;
     
     unsigned FPInstClass = Flags & X86II::FPTypeMask;
     if (MI->isInlineAsm())
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index c4522f3fd9e..1c4301c7038 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -83,6 +83,7 @@ def SSEPackedInt    : Domain<3>;
 class OpSize { bit hasOpSizePrefix = 1; }
 class AdSize { bit hasAdSizePrefix = 1; }
 class REX_W  { bit hasREX_WPrefix = 1; }
+class VEX_4V { bit hasVEX_4VPrefix = 1; }
 class LOCK   { bit hasLockPrefix = 1; }
 class SegFS  { bits<2> SegOvrBits = 1; }
 class SegGS  { bits<2> SegOvrBits = 2; }
@@ -124,6 +125,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
 
   bits<4> Prefix = 0;       // Which prefix byte does this inst have?
   bit hasREX_WPrefix  = 0;  // Does this inst requires the REX.W prefix?
+  bit hasVEX_4VPrefix  = 0;  // Does this inst requires the VEX.VVVV prefix?
   FPFormat FPForm = NotFP;  // What flavor of FP instruction is this?
   bit hasLockPrefix = 0;    // Does this inst have a 0xF0 prefix?
   bits<2> SegOvrBits = 0;   // Segment override prefix.
@@ -141,6 +143,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   let TSFlags{21-20} = SegOvrBits;
   let TSFlags{23-22} = ExeDomain.Value;
   let TSFlags{31-24} = Opcode;
+  let TSFlags{32}    = hasVEX_4VPrefix;
 }
 
 class I<bits<8> o, Format f, dag outs, dag ins, string asm,
@@ -216,6 +219,7 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
 //   SSI   - SSE1 instructions with XS prefix.
 //   PSI   - SSE1 instructions with TB prefix.
 //   PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
+//   VSSI  - SSE1 instructions with XS prefix in AVX form.
 
 class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
       : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
@@ -229,6 +233,10 @@ class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
             list<dag> pattern>
       : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
         Requires<[HasSSE1]>;
+class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS, VEX_4V,
+        Requires<[HasAVX, HasSSE1]>;
 
 // SSE2 Instruction Templates:
 // 
@@ -237,6 +245,7 @@ class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
 //   SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
 //   PDI    - SSE2 instructions with TB and OpSize prefixes.
 //   PDIi8  - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
+//   VSDI   - SSE2 instructions with XD prefix in AVX form.
 
 class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
       : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
@@ -253,6 +262,10 @@ class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
             list<dag> pattern>
       : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
         Requires<[HasSSE2]>;
+class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD, VEX_4V,
+        Requires<[HasAVX, HasSSE2]>;
 
 // SSE3 Instruction Templates:
 // 
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index f5c802288d4..9016c16ec43 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -417,22 +417,36 @@ namespace X86II {
 
     OpcodeShift   = 24,
     OpcodeMask    = 0xFF << OpcodeShift
+
   };
   
+  // FIXME: The enum opcode space is over and more bits are needed. Anywhere
+  // those enums below are used, TSFlags must be shifted right by 32 first.
+  enum {
+    //===------------------------------------------------------------------===//
+    // VEX_4V - VEX prefixes are instruction prefixes used in AVX.
+    // VEX_4V is used to specify an additional AVX/SSE register. Several 2
+    // address instructions in SSE are represented as 3 address ones in AVX
+    // and the additional register is encoded in VEX_VVVV prefix.
+    //
+    VEXShift    = 0,
+    VEX_4V      = 1 << VEXShift
+  };
+
   // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
   // specified machine instruction.
   //
-  static inline unsigned char getBaseOpcodeFor(unsigned TSFlags) {
+  static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
     return TSFlags >> X86II::OpcodeShift;
   }
   
-  static inline bool hasImm(unsigned TSFlags) {
+  static inline bool hasImm(uint64_t TSFlags) {
     return (TSFlags & X86II::ImmMask) != 0;
   }
   
   /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
   /// of the specified instruction.
-  static inline unsigned getSizeOfImm(unsigned TSFlags) {
+  static inline unsigned getSizeOfImm(uint64_t TSFlags) {
     switch (TSFlags & X86II::ImmMask) {
     default: assert(0 && "Unknown immediate size");
     case X86II::Imm8:
@@ -446,7 +460,7 @@ namespace X86II {
   
   /// isImmPCRel - Return true if the immediate of the specified instruction's
   /// TSFlags indicates that it is pc relative.
-  static inline unsigned isImmPCRel(unsigned TSFlags) {
+  static inline unsigned isImmPCRel(uint64_t TSFlags) {
     switch (TSFlags & X86II::ImmMask) {
       default: assert(0 && "Unknown immediate size");
       case X86II::Imm8PCRel:
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 579e332bcf3..32358a3ab8b 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -673,6 +673,26 @@ multiclass basic_sse12_fp_binop_rm<bits<8> opc, string OpcodeStr,
     let isCommutable = Commutable;
   }
 
+  def V#NAME#SSrr : VSSI<opc, MRMSrcReg, (outs FR32:$dst),
+                 (ins FR32:$src1, FR32:$src2),
+                 !strconcat(OpcodeStr,
+                            "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 []> {
+    let isCommutable = Commutable;
+    let Constraints = "";
+    let isAsmParserOnly = 1;
+  }
+
+  def V#NAME#SDrr : VSDI<opc, MRMSrcReg, (outs FR64:$dst),
+                 (ins FR64:$src1, FR64:$src2),
+                 !strconcat(OpcodeStr,
+                            "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 []> {
+    let isCommutable = Commutable;
+    let Constraints = "";
+    let isAsmParserOnly = 1;
+  }
+
   // Scalar operation, reg+mem.
   def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
                                  (ins FR32:$src1, f32mem:$src2),
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index a9681e6670f..f97ac2fe482 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -101,12 +101,19 @@ public:
   
   void EmitMemModRMByte(const MCInst &MI, unsigned Op,
                         unsigned RegOpcodeField, 
-                        unsigned TSFlags, unsigned &CurByte, raw_ostream &OS,
+                        uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS,
                         SmallVectorImpl<MCFixup> &Fixups) const;
   
   void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups) const;
   
+  void EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+                           const MCInst &MI, const TargetInstrDesc &Desc,
+                           raw_ostream &OS) const;
+
+  void EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+                        const MCInst &MI, const TargetInstrDesc &Desc,
+                        raw_ostream &OS) const;
 };
 
 } // end anonymous namespace
@@ -133,7 +140,7 @@ static bool isDisp8(int Value) {
 
 /// getImmFixupKind - Return the appropriate fixup kind to use for an immediate
 /// in an instruction with the specified TSFlags.
-static MCFixupKind getImmFixupKind(unsigned TSFlags) {
+static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
   unsigned Size = X86II::getSizeOfImm(TSFlags);
   bool isPCRel = X86II::isImmPCRel(TSFlags);
   
@@ -184,7 +191,7 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
 
 void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
                                         unsigned RegOpcodeField,
-                                        unsigned TSFlags, unsigned &CurByte,
+                                        uint64_t TSFlags, unsigned &CurByte,
                                         raw_ostream &OS,
                                         SmallVectorImpl<MCFixup> &Fixups) const{
   const MCOperand &Disp     = MI.getOperand(Op+3);
@@ -324,10 +331,159 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
     EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
 }
 
+/// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix
+/// called VEX.
+void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+                            const MCInst &MI, const TargetInstrDesc &Desc,
+                            raw_ostream &OS) const {
+
+  // Pseudo instructions never have a VEX prefix.
+  if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
+    return;
+
+  // VEX_R: opcode externsion equivalent to REX.R in
+  // 1's complement (inverted) form
+  //
+  //  1: Same as REX_R=0 (must be 1 in 32-bit mode)
+  //  0: Same as REX_R=1 (64 bit mode only)
+  //
+  unsigned char VEX_R = 0x1;
+
+  // VEX_B:
+  //
+  //  1: Same as REX_B=0 (ignored in 32-bit mode)
+  //  0: Same as REX_B=1 (64 bit mode only)
+  //
+  unsigned char VEX_B = 0x1;
+
+  // VEX_W: opcode specific (use like REX.W, or used for
+  // opcode extension, or ignored, depending on the opcode byte)
+  unsigned char VEX_W = 0;
+
+  // VEX_5M (VEX m-mmmmm field):
+  //
+  //  0b00000: Reserved for future use
+  //  0b00001: implied 0F leading opcode
+  //  0b00010: implied 0F 38 leading opcode bytes
+  //  0b00011: implied 0F 3A leading opcode bytes
+  //  0b00100-0b11111: Reserved for future use
+  //
+  unsigned char VEX_5M = 0x1;
+
+  // VEX_4V (VEX vvvv field): a register specifier
+  // (in 1's complement form) or 1111 if unused.
+  unsigned char VEX_4V = 0xf;
+
+  // VEX_L (Vector Length):
+  //
+  //  0: scalar or 128-bit vector
+  //  1: 256-bit vector
+  //
+  unsigned char VEX_L = 0;
+
+  // VEX_PP: opcode extension providing equivalent
+  // functionality of a SIMD prefix
+  //
+  //  0b00: None
+  //  0b01: 66 (not handled yet)
+  //  0b10: F3
+  //  0b11: F2
+  //
+  unsigned char VEX_PP = 0;
+
+  switch (TSFlags & X86II::Op0Mask) {
+  default: assert(0 && "Invalid prefix!");
+  case 0: break;  // No prefix!
+  case X86II::T8:  // 0F 38
+    VEX_5M = 0x2;
+    break;
+  case X86II::TA:  // 0F 3A
+    VEX_5M = 0x3;
+    break;
+  case X86II::TF:  // F2 0F 38
+    VEX_PP = 0x3;
+    VEX_5M = 0x2;
+    break;
+  case X86II::XS:  // F3 0F
+    VEX_PP = 0x2;
+    break;
+  case X86II::XD:  // F2 0F
+    VEX_PP = 0x3;
+    break;
+  }
+
+  unsigned NumOps = MI.getNumOperands();
+  unsigned i = 0;
+  unsigned SrcReg = 0, SrcRegNum = 0;
+
+  switch (TSFlags & X86II::FormMask) {
+  case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+  case X86II::MRMSrcReg:
+    if (MI.getOperand(0).isReg() &&
+        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+      VEX_R = 0x0;
+
+    // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the
+    // range 0-7 and the difference between the 2 groups is given by the
+    // REX prefix. In the VEX prefix, registers are seen sequencially
+    // from 0-15 and encoded in 1's complement form, example:
+    //
+    //  ModRM field => XMM9 => 1
+    //  VEX.VVVV    => XMM9 => ~9
+    //
+    // See table 4-35 of Intel AVX Programming Reference for details.
+    SrcReg = MI.getOperand(1).getReg();
+    SrcRegNum = GetX86RegNum(MI.getOperand(1));
+    if (SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15)
+      SrcRegNum += 8;
+
+    // The registers represented through VEX_VVVV should
+    // be encoded in 1's complement form.
+    if ((TSFlags >> 32) & X86II::VEX_4V)
+      VEX_4V = (~SrcRegNum) & 0xf;
+
+    i = 2; // Skip the VEX.VVVV operand.
+    for (; i != NumOps; ++i) {
+      const MCOperand &MO = MI.getOperand(i);
+      if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+        VEX_B = 0x0;
+    }
+    break;
+  default:
+    assert(0 && "Not implemented!");
+  }
+
+  // VEX opcode prefix can have 2 or 3 bytes
+  //
+  //  3 bytes:
+  //    +-----+ +--------------+ +-------------------+
+  //    | C4h | | RXB | m-mmmm | | W | vvvv | L | pp |
+  //    +-----+ +--------------+ +-------------------+
+  //  2 bytes:
+  //    +-----+ +-------------------+
+  //    | C5h | | R | vvvv | L | pp |
+  //    +-----+ +-------------------+
+  //
+  // Note: VEX.X isn't used so far
+  //
+  unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
+
+  if (VEX_B /* & VEX_X */) { // 2 byte VEX prefix
+    EmitByte(0xC5, CurByte, OS);
+    EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
+    return;
+  }
+
+  // 3 byte VEX prefix
+  EmitByte(0xC4, CurByte, OS);
+  EmitByte(VEX_R << 7 | 1 << 6 /* VEX_X = 1 */ | VEX_5M, CurByte, OS);
+  EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
+}
+
 /// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64
 /// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
 /// size, and 3) use of X86-64 extended registers.
-static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags,
+static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
                                    const TargetInstrDesc &Desc) {
   // Pseudo instructions never have a rex byte.
   if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
@@ -422,18 +578,10 @@ static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags,
   return REX;
 }
 
-void X86MCCodeEmitter::
-EncodeInstruction(const MCInst &MI, raw_ostream &OS,
-                  SmallVectorImpl<MCFixup> &Fixups) const {
-  unsigned Opcode = MI.getOpcode();
-  const TargetInstrDesc &Desc = TII.get(Opcode);
-  unsigned TSFlags = Desc.TSFlags;
-
-  // Keep track of the current byte being emitted.
-  unsigned CurByte = 0;
-  
-  // FIXME: We should emit the prefixes in exactly the same order as GAS does,
-  // in order to provide diffability.
+/// EmitOpcodePrefix - Emit all instruction prefixes prior to the opcode.
+void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+                            const MCInst &MI, const TargetInstrDesc &Desc,
+                            raw_ostream &OS) const {
 
   // Emit the lock opcode prefix as needed.
   if (TSFlags & X86II::LOCK)
@@ -516,6 +664,30 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     EmitByte(0x3A, CurByte, OS);
     break;
   }
+}
+
+void X86MCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = TII.get(Opcode);
+  uint64_t TSFlags = Desc.TSFlags;
+
+  // Keep track of the current byte being emitted.
+  unsigned CurByte = 0;
+  
+  // Is this instruction encoded in AVX form?
+  bool IsAVXForm = false;
+  if ((TSFlags >> 32) & X86II::VEX_4V)
+    IsAVXForm = true;
+
+  // FIXME: We should emit the prefixes in exactly the same order as GAS does,
+  // in order to provide diffability.
+
+  if (!IsAVXForm)
+    EmitOpcodePrefix(TSFlags, CurByte, MI, Desc, OS);
+  else
+    EmitVEXOpcodePrefix(TSFlags, CurByte, MI, Desc, OS);
   
   // If this is a two-address instruction, skip one of the register operands.
   unsigned NumOps = Desc.getNumOperands();
@@ -527,6 +699,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     --NumOps;
   
   unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
+  unsigned SrcRegNum = 0;
   switch (TSFlags & X86II::FormMask) {
   case X86II::MRMInitReg:
     assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!");
@@ -558,9 +731,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
       
   case X86II::MRMSrcReg:
     EmitByte(BaseOpcode, CurByte, OS);
-    EmitRegModRMByte(MI.getOperand(CurOp+1), GetX86RegNum(MI.getOperand(CurOp)),
-                     CurByte, OS);
-    CurOp += 2;
+    SrcRegNum = CurOp + 1;
+
+    if (IsAVXForm) // Skip 1st src (which is encoded in VEX_VVVV)
+      SrcRegNum++;
+
+    EmitRegModRMByte(MI.getOperand(SrcRegNum),
+                     GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
+    CurOp = SrcRegNum + 1;
     break;
     
   case X86II::MRMSrcMem: {
diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s
index 994dbb85990..7f79b9d4dd5 100644
--- a/test/MC/AsmParser/X86/x86_32-encoding.s
+++ b/test/MC/AsmParser/X86/x86_32-encoding.s
@@ -10052,3 +10052,35 @@ pshufb	CPI1_0(%rip), %xmm1
 // CHECK: ficomps 32493
 // CHECK:  encoding: [0xde,0x1d,0xed,0x7e,0x00,0x00]
           ficomps 32493
+
+// CHECK: vaddss  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xca,0x58,0xd4]
+          vaddss  %xmm4, %xmm6, %xmm2
+
+// CHECK: vmulss  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xca,0x59,0xd4]
+          vmulss  %xmm4, %xmm6, %xmm2
+
+// CHECK: vsubss  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xca,0x5c,0xd4]
+          vsubss  %xmm4, %xmm6, %xmm2
+
+// CHECK: vdivss  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xca,0x5e,0xd4]
+          vdivss  %xmm4, %xmm6, %xmm2
+
+// CHECK: vaddsd  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xcb,0x58,0xd4]
+          vaddsd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vmulsd  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xcb,0x59,0xd4]
+          vmulsd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vsubsd  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xcb,0x5c,0xd4]
+          vsubsd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vdivsd  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xcb,0x5e,0xd4]
+          vdivsd  %xmm4, %xmm6, %xmm2
diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s
index 036f7d49da0..b2596086308 100644
--- a/test/MC/AsmParser/X86/x86_64-encoding.s
+++ b/test/MC/AsmParser/X86/x86_64-encoding.s
@@ -103,3 +103,35 @@ movd %mm1, %rdx
 // CHECK: movd %mm1, %edx
 // CHECK:  encoding: [0x0f,0x7e,0xca]
 movd %mm1, %edx
+
+// CHECK: vaddss  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x32,0x58,0xd0]
+vaddss  %xmm8, %xmm9, %xmm10
+
+// CHECK: vmulss  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x32,0x59,0xd0]
+vmulss  %xmm8, %xmm9, %xmm10
+
+// CHECK: vsubss  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x32,0x5c,0xd0]
+vsubss  %xmm8, %xmm9, %xmm10
+
+// CHECK: vdivss  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x32,0x5e,0xd0]
+vdivss  %xmm8, %xmm9, %xmm10
+
+// CHECK: vaddsd  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x33,0x58,0xd0]
+vaddsd  %xmm8, %xmm9, %xmm10
+
+// CHECK: vmulsd  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x33,0x59,0xd0]
+vmulsd  %xmm8, %xmm9, %xmm10
+
+// CHECK: vsubsd  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x33,0x5c,0xd0]
+vsubsd  %xmm8, %xmm9, %xmm10
+
+// CHECK: vdivsd  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x33,0x5e,0xd0]
+vdivsd  %xmm8, %xmm9, %xmm10
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index b7085ae6c7b..5fe2bfec1d4 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -212,6 +212,7 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
   
   HasOpSizePrefix  = Rec->getValueAsBit("hasOpSizePrefix");
   HasREX_WPrefix   = Rec->getValueAsBit("hasREX_WPrefix");
+  HasVEX_4VPrefix  = Rec->getValueAsBit("hasVEX_4VPrefix");
   HasLockPrefix    = Rec->getValueAsBit("hasLockPrefix");
   IsCodeGenOnly    = Rec->getValueAsBit("isCodeGenOnly");
   
@@ -532,7 +533,12 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
            "Unexpected number of operands for MRMSrcRegFrm");
     HANDLE_OPERAND(roRegister)
     HANDLE_OPERAND(rmRegister)
-    HANDLE_OPTIONAL(immediate)
+
+    if (HasVEX_4VPrefix)
+      // FIXME: encoding of registers in AVX is in 1's complement form.
+      HANDLE_OPTIONAL(rmRegister)
+    else
+      HANDLE_OPTIONAL(immediate)
     break;
   case X86Local::MRMSrcMem:
     // Operand 1 is a register operand in the Reg/Opcode field.
diff --git a/utils/TableGen/X86RecognizableInstr.h b/utils/TableGen/X86RecognizableInstr.h
index 84374b000bf..db4d96dda03 100644
--- a/utils/TableGen/X86RecognizableInstr.h
+++ b/utils/TableGen/X86RecognizableInstr.h
@@ -52,6 +52,8 @@ private:
   bool HasOpSizePrefix;
   /// The hasREX_WPrefix field from the record
   bool HasREX_WPrefix;
+  /// The hasVEX_4VPrefix field from the record
+  bool HasVEX_4VPrefix;
   /// The hasLockPrefix field from the record
   bool HasLockPrefix;
   /// The isCodeGenOnly filed from the record