lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp

   1 //===-- X86DisassemblerDecoder.cpp - Disassembler decoder -----------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file is part of the X86 Disassembler.
  11 // It contains the implementation of the instruction decoder.
  12 // Documentation for the disassembler can be found in X86Disassembler.h.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include <cstdarg>   /* for va_*()       */
  17 #include <cstdio>    /* for vsnprintf()  */
  18 #include <cstdlib>   /* for exit()       */
  19 #include <cstring>   /* for memset()     */
  20
  21 #include "X86DisassemblerDecoder.h"
  22
  23 using namespace llvm::X86Disassembler;
  24
  25 /// Specifies whether a ModR/M byte is needed and (if so) which
  26 /// instruction each possible value of the ModR/M byte corresponds to.  Once
  27 /// this information is known, we have narrowed down to a single instruction.
  28 struct ModRMDecision {
  29   uint8_t modrm_type;
  30   uint16_t instructionIDs;
  31 };
  32
  33 /// Specifies which set of ModR/M->instruction tables to look at
  34 /// given a particular opcode.
  35 struct OpcodeDecision {
  36   ModRMDecision modRMDecisions[256];
  37 };
  38
  39 /// Specifies which opcode->instruction tables to look at given
  40 /// a particular context (set of attributes).  Since there are many possible
  41 /// contexts, the decoder first uses CONTEXTS_SYM to determine which context
  42 /// applies given a specific set of attributes.  Hence there are only IC_max
  43 /// entries in this table, rather than 2^(ATTR_max).
  44 struct ContextDecision {
  45   OpcodeDecision opcodeDecisions[IC_max];
  46 };
  47
  48 #include "X86GenDisassemblerTables.inc"
  49
  50 #ifndef NDEBUG
  51 #define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0)
  52 #else
  53 #define debug(s) do { } while (0)
  54 #endif
  55
  56
  57 /*
  58  * contextForAttrs - Client for the instruction context table.  Takes a set of
  59  *   attributes and returns the appropriate decode context.
  60  *
  61  * @param attrMask  - Attributes, from the enumeration attributeBits.
  62  * @return          - The InstructionContext to use when looking up an
  63  *                    an instruction with these attributes.
  64  */
  65 static InstructionContext contextForAttrs(uint16_t attrMask) {
  66   return static_cast<InstructionContext>(CONTEXTS_SYM[attrMask]);
  67 }
  68
  69 /*
  70  * modRMRequired - Reads the appropriate instruction table to determine whether
  71  *   the ModR/M byte is required to decode a particular instruction.
  72  *
  73  * @param type        - The opcode type (i.e., how many bytes it has).
  74  * @param insnContext - The context for the instruction, as returned by
  75  *                      contextForAttrs.
  76  * @param opcode      - The last byte of the instruction's opcode, not counting
  77  *                      ModR/M extensions and escapes.
  78  * @return            - true if the ModR/M byte is required, false otherwise.
  79  */
  80 static int modRMRequired(OpcodeType type,
  81                          InstructionContext insnContext,
  82                          uint16_t opcode) {
  83   const struct ContextDecision* decision = nullptr;
  84
  85   switch (type) {
  86   case ONEBYTE:
  87     decision = &ONEBYTE_SYM;
  88     break;
  89   case TWOBYTE:
  90     decision = &TWOBYTE_SYM;
  91     break;
  92   case THREEBYTE_38:
  93     decision = &THREEBYTE38_SYM;
  94     break;
  95   case THREEBYTE_3A:
  96     decision = &THREEBYTE3A_SYM;
  97     break;
  98   case XOP8_MAP:
  99     decision = &XOP8_MAP_SYM;
 100     break;
 101   case XOP9_MAP:
 102     decision = &XOP9_MAP_SYM;
 103     break;
 104   case XOPA_MAP:
 105     decision = &XOPA_MAP_SYM;
 106     break;
 107   }
 108
 109   return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
 110     modrm_type != MODRM_ONEENTRY;
 111 }
 112
 113 /*
 114  * decode - Reads the appropriate instruction table to obtain the unique ID of
 115  *   an instruction.
 116  *
 117  * @param type        - See modRMRequired().
 118  * @param insnContext - See modRMRequired().
 119  * @param opcode      - See modRMRequired().
 120  * @param modRM       - The ModR/M byte if required, or any value if not.
 121  * @return            - The UID of the instruction, or 0 on failure.
 122  */
 123 static InstrUID decode(OpcodeType type,
 124                        InstructionContext insnContext,
 125                        uint8_t opcode,
 126                        uint8_t modRM) {
 127   const struct ModRMDecision* dec = nullptr;
 128
 129   switch (type) {
 130   case ONEBYTE:
 131     dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
 132     break;
 133   case TWOBYTE:
 134     dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
 135     break;
 136   case THREEBYTE_38:
 137     dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
 138     break;
 139   case THREEBYTE_3A:
 140     dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
 141     break;
 142   case XOP8_MAP:
 143     dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
 144     break;
 145   case XOP9_MAP:
 146     dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
 147     break;
 148   case XOPA_MAP:
 149     dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
 150     break;
 151   }
 152
 153   switch (dec->modrm_type) {
 154   default:
 155     debug("Corrupt table!  Unknown modrm_type");
 156     return 0;
 157   case MODRM_ONEENTRY:
 158     return modRMTable[dec->instructionIDs];
 159   case MODRM_SPLITRM:
 160     if (modFromModRM(modRM) == 0x3)
 161       return modRMTable[dec->instructionIDs+1];
 162     return modRMTable[dec->instructionIDs];
 163   case MODRM_SPLITREG:
 164     if (modFromModRM(modRM) == 0x3)
 165       return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
 166     return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
 167   case MODRM_SPLITMISC:
 168     if (modFromModRM(modRM) == 0x3)
 169       return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
 170     return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
 171   case MODRM_FULL:
 172     return modRMTable[dec->instructionIDs+modRM];
 173   }
 174 }
 175
 176 /*
 177  * specifierForUID - Given a UID, returns the name and operand specification for
 178  *   that instruction.
 179  *
 180  * @param uid - The unique ID for the instruction.  This should be returned by
 181  *              decode(); specifierForUID will not check bounds.
 182  * @return    - A pointer to the specification for that instruction.
 183  */
 184 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
 185   return &INSTRUCTIONS_SYM[uid];
 186 }
 187
 188 /*
 189  * consumeByte - Uses the reader function provided by the user to consume one
 190  *   byte from the instruction's memory and advance the cursor.
 191  *
 192  * @param insn  - The instruction with the reader function to use.  The cursor
 193  *                for this instruction is advanced.
 194  * @param byte  - A pointer to a pre-allocated memory buffer to be populated
 195  *                with the data read.
 196  * @return      - 0 if the read was successful; nonzero otherwise.
 197  */
 198 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
 199   int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
 200
 201   if (!ret)
 202     ++(insn->readerCursor);
 203
 204   return ret;
 205 }
 206
 207 /*
 208  * lookAtByte - Like consumeByte, but does not advance the cursor.
 209  *
 210  * @param insn  - See consumeByte().
 211  * @param byte  - See consumeByte().
 212  * @return      - See consumeByte().
 213  */
 214 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
 215   return insn->reader(insn->readerArg, byte, insn->readerCursor);
 216 }
 217
 218 static void unconsumeByte(struct InternalInstruction* insn) {
 219   insn->readerCursor--;
 220 }
 221
 222 #define CONSUME_FUNC(name, type)                                  \
 223   static int name(struct InternalInstruction* insn, type* ptr) {  \
 224     type combined = 0;                                            \
 225     unsigned offset;                                              \
 226     for (offset = 0; offset < sizeof(type); ++offset) {           \
 227       uint8_t byte;                                               \
 228       int ret = insn->reader(insn->readerArg,                     \
 229                              &byte,                               \
 230                              insn->readerCursor + offset);        \
 231       if (ret)                                                    \
 232         return ret;                                               \
 233       combined = combined | ((uint64_t)byte << (offset * 8));     \
 234     }                                                             \
 235     *ptr = combined;                                              \
 236     insn->readerCursor += sizeof(type);                           \
 237     return 0;                                                     \
 238   }
 239
 240 /*
 241  * consume* - Use the reader function provided by the user to consume data
 242  *   values of various sizes from the instruction's memory and advance the
 243  *   cursor appropriately.  These readers perform endian conversion.
 244  *
 245  * @param insn    - See consumeByte().
 246  * @param ptr     - A pointer to a pre-allocated memory of appropriate size to
 247  *                  be populated with the data read.
 248  * @return        - See consumeByte().
 249  */
 250 CONSUME_FUNC(consumeInt8, int8_t)
 251 CONSUME_FUNC(consumeInt16, int16_t)
 252 CONSUME_FUNC(consumeInt32, int32_t)
 253 CONSUME_FUNC(consumeUInt16, uint16_t)
 254 CONSUME_FUNC(consumeUInt32, uint32_t)
 255 CONSUME_FUNC(consumeUInt64, uint64_t)
 256
 257 /*
 258  * dbgprintf - Uses the logging function provided by the user to log a single
 259  *   message, typically without a carriage-return.
 260  *
 261  * @param insn    - The instruction containing the logging function.
 262  * @param format  - See printf().
 263  * @param ...     - See printf().
 264  */
 265 static void dbgprintf(struct InternalInstruction* insn,
 266                       const char* format,
 267                       ...) {
 268   char buffer[256];
 269   va_list ap;
 270
 271   if (!insn->dlog)
 272     return;
 273
 274   va_start(ap, format);
 275   (void)vsnprintf(buffer, sizeof(buffer), format, ap);
 276   va_end(ap);
 277
 278   insn->dlog(insn->dlogArg, buffer);
 279
 280   return;
 281 }
 282
 283 /*
 284  * setPrefixPresent - Marks that a particular prefix is present at a particular
 285  *   location.
 286  *
 287  * @param insn      - The instruction to be marked as having the prefix.
 288  * @param prefix    - The prefix that is present.
 289  * @param location  - The location where the prefix is located (in the address
 290  *                    space of the instruction's reader).
 291  */
 292 static void setPrefixPresent(struct InternalInstruction* insn,
 293                                     uint8_t prefix,
 294                                     uint64_t location)
 295 {
 296   insn->prefixPresent[prefix] = 1;
 297   insn->prefixLocations[prefix] = location;
 298 }
 299
 300 /*
 301  * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
 302  *   present at a given location.
 303  *
 304  * @param insn      - The instruction to be queried.
 305  * @param prefix    - The prefix.
 306  * @param location  - The location to query.
 307  * @return          - Whether the prefix is at that location.
 308  */
 309 static bool isPrefixAtLocation(struct InternalInstruction* insn,
 310                                uint8_t prefix,
 311                                uint64_t location)
 312 {
 313   return insn->prefixPresent[prefix] == 1 &&
 314      insn->prefixLocations[prefix] == location;
 315 }
 316
 317 /*
 318  * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
 319  *   instruction as having them.  Also sets the instruction's default operand,
 320  *   address, and other relevant data sizes to report operands correctly.
 321  *
 322  * @param insn  - The instruction whose prefixes are to be read.
 323  * @return      - 0 if the instruction could be read until the end of the prefix
 324  *                bytes, and no prefixes conflicted; nonzero otherwise.
 325  */
 326 static int readPrefixes(struct InternalInstruction* insn) {
 327   bool isPrefix = true;
 328   bool prefixGroups[4] = { false };
 329   uint64_t prefixLocation;
 330   uint8_t byte = 0;
 331   uint8_t nextByte;
 332
 333   bool hasAdSize = false;
 334   bool hasOpSize = false;
 335
 336   dbgprintf(insn, "readPrefixes()");
 337
 338   while (isPrefix) {
 339     prefixLocation = insn->readerCursor;
 340
 341     /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */
 342     if (consumeByte(insn, &byte))
 343       break;
 344
 345     /*
 346      * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
 347      * break and let it be disassembled as a normal "instruction".
 348      */
 349     if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
 350       break;
 351
 352     if (insn->readerCursor - 1 == insn->startLocation
 353         && (byte == 0xf2 || byte == 0xf3)
 354         && !lookAtByte(insn, &nextByte))
 355     {
 356       /*
 357        * If the byte is 0xf2 or 0xf3, and any of the following conditions are
 358        * met:
 359        * - it is followed by a LOCK (0xf0) prefix
 360        * - it is followed by an xchg instruction
 361        * then it should be disassembled as a xacquire/xrelease not repne/rep.
 362        */
 363       if ((byte == 0xf2 || byte == 0xf3) &&
 364           ((nextByte == 0xf0) |
 365           ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
 366         insn->xAcquireRelease = true;
 367       /*
 368        * Also if the byte is 0xf3, and the following condition is met:
 369        * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
 370        *                       "mov mem, imm" (opcode 0xc6/0xc7) instructions.
 371        * then it should be disassembled as an xrelease not rep.
 372        */
 373       if (byte == 0xf3 &&
 374           (nextByte == 0x88 || nextByte == 0x89 ||
 375            nextByte == 0xc6 || nextByte == 0xc7))
 376         insn->xAcquireRelease = true;
 377       if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {
 378         if (consumeByte(insn, &nextByte))
 379           return -1;
 380         if (lookAtByte(insn, &nextByte))
 381           return -1;
 382         unconsumeByte(insn);
 383       }
 384       if (nextByte != 0x0f && nextByte != 0x90)
 385         break;
 386     }
 387
 388     switch (byte) {
 389     case 0xf0:  /* LOCK */
 390     case 0xf2:  /* REPNE/REPNZ */
 391     case 0xf3:  /* REP or REPE/REPZ */
 392       if (prefixGroups[0])
 393         dbgprintf(insn, "Redundant Group 1 prefix");
 394       prefixGroups[0] = true;
 395       setPrefixPresent(insn, byte, prefixLocation);
 396       break;
 397     case 0x2e:  /* CS segment override -OR- Branch not taken */
 398     case 0x36:  /* SS segment override -OR- Branch taken */
 399     case 0x3e:  /* DS segment override */
 400     case 0x26:  /* ES segment override */
 401     case 0x64:  /* FS segment override */
 402     case 0x65:  /* GS segment override */
 403       switch (byte) {
 404       case 0x2e:
 405         insn->segmentOverride = SEG_OVERRIDE_CS;
 406         break;
 407       case 0x36:
 408         insn->segmentOverride = SEG_OVERRIDE_SS;
 409         break;
 410       case 0x3e:
 411         insn->segmentOverride = SEG_OVERRIDE_DS;
 412         break;
 413       case 0x26:
 414         insn->segmentOverride = SEG_OVERRIDE_ES;
 415         break;
 416       case 0x64:
 417         insn->segmentOverride = SEG_OVERRIDE_FS;
 418         break;
 419       case 0x65:
 420         insn->segmentOverride = SEG_OVERRIDE_GS;
 421         break;
 422       default:
 423         debug("Unhandled override");
 424         return -1;
 425       }
 426       if (prefixGroups[1])
 427         dbgprintf(insn, "Redundant Group 2 prefix");
 428       prefixGroups[1] = true;
 429       setPrefixPresent(insn, byte, prefixLocation);
 430       break;
 431     case 0x66:  /* Operand-size override */
 432       if (prefixGroups[2])
 433         dbgprintf(insn, "Redundant Group 3 prefix");
 434       prefixGroups[2] = true;
 435       hasOpSize = true;
 436       setPrefixPresent(insn, byte, prefixLocation);
 437       break;
 438     case 0x67:  /* Address-size override */
 439       if (prefixGroups[3])
 440         dbgprintf(insn, "Redundant Group 4 prefix");
 441       prefixGroups[3] = true;
 442       hasAdSize = true;
 443       setPrefixPresent(insn, byte, prefixLocation);
 444       break;
 445     default:    /* Not a prefix byte */
 446       isPrefix = false;
 447       break;
 448     }
 449
 450     if (isPrefix)
 451       dbgprintf(insn, "Found prefix 0x%hhx", byte);
 452   }
 453
 454   insn->vectorExtensionType = TYPE_NO_VEX_XOP;
 455
 456   if (byte == 0x62) {
 457     uint8_t byte1, byte2;
 458
 459     if (consumeByte(insn, &byte1)) {
 460       dbgprintf(insn, "Couldn't read second byte of EVEX prefix");
 461       return -1;
 462     }
 463
 464     if (lookAtByte(insn, &byte2)) {
 465       dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
 466       return -1;
 467     }
 468
 469     if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
 470        ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
 471       insn->vectorExtensionType = TYPE_EVEX;
 472     } else {
 473       unconsumeByte(insn); /* unconsume byte1 */
 474       unconsumeByte(insn); /* unconsume byte  */
 475       insn->necessaryPrefixLocation = insn->readerCursor - 2;
 476     }
 477
 478     if (insn->vectorExtensionType == TYPE_EVEX) {
 479       insn->vectorExtensionPrefix[0] = byte;
 480       insn->vectorExtensionPrefix[1] = byte1;
 481       if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) {
 482         dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
 483         return -1;
 484       }
 485       if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) {
 486         dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix");
 487         return -1;
 488       }
 489
 490       /* We simulate the REX prefix for simplicity's sake */
 491       if (insn->mode == MODE_64BIT) {
 492         insn->rexPrefix = 0x40
 493                         | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3)
 494                         | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2)
 495                         | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1)
 496                         | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
 497       }
 498
 499       dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
 500               insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
 501               insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]);
 502     }
 503   } else if (byte == 0xc4) {
 504     uint8_t byte1;
 505
 506     if (lookAtByte(insn, &byte1)) {
 507       dbgprintf(insn, "Couldn't read second byte of VEX");
 508       return -1;
 509     }
 510
 511     if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
 512       insn->vectorExtensionType = TYPE_VEX_3B;
 513       insn->necessaryPrefixLocation = insn->readerCursor - 1;
 514     } else {
 515       unconsumeByte(insn);
 516       insn->necessaryPrefixLocation = insn->readerCursor - 1;
 517     }
 518
 519     if (insn->vectorExtensionType == TYPE_VEX_3B) {
 520       insn->vectorExtensionPrefix[0] = byte;
 521       consumeByte(insn, &insn->vectorExtensionPrefix[1]);
 522       consumeByte(insn, &insn->vectorExtensionPrefix[2]);
 523
 524       /* We simulate the REX prefix for simplicity's sake */
 525
 526       if (insn->mode == MODE_64BIT) {
 527         insn->rexPrefix = 0x40
 528                         | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3)
 529                         | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2)
 530                         | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1)
 531                         | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
 532       }
 533
 534       dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
 535                 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
 536                 insn->vectorExtensionPrefix[2]);
 537     }
 538   } else if (byte == 0xc5) {
 539     uint8_t byte1;
 540
 541     if (lookAtByte(insn, &byte1)) {
 542       dbgprintf(insn, "Couldn't read second byte of VEX");
 543       return -1;
 544     }
 545
 546     if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
 547       insn->vectorExtensionType = TYPE_VEX_2B;
 548     } else {
 549       unconsumeByte(insn);
 550     }
 551
 552     if (insn->vectorExtensionType == TYPE_VEX_2B) {
 553       insn->vectorExtensionPrefix[0] = byte;
 554       consumeByte(insn, &insn->vectorExtensionPrefix[1]);
 555
 556       if (insn->mode == MODE_64BIT) {
 557         insn->rexPrefix = 0x40
 558                         | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
 559       }
 560
 561       switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
 562       default:
 563         break;
 564       case VEX_PREFIX_66:
 565         hasOpSize = true;
 566         break;
 567       }
 568
 569       dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx",
 570                 insn->vectorExtensionPrefix[0],
 571                 insn->vectorExtensionPrefix[1]);
 572     }
 573   } else if (byte == 0x8f) {
 574     uint8_t byte1;
 575
 576     if (lookAtByte(insn, &byte1)) {
 577       dbgprintf(insn, "Couldn't read second byte of XOP");
 578       return -1;
 579     }
 580
 581     if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */
 582       insn->vectorExtensionType = TYPE_XOP;
 583       insn->necessaryPrefixLocation = insn->readerCursor - 1;
 584     } else {
 585       unconsumeByte(insn);
 586       insn->necessaryPrefixLocation = insn->readerCursor - 1;
 587     }
 588
 589     if (insn->vectorExtensionType == TYPE_XOP) {
 590       insn->vectorExtensionPrefix[0] = byte;
 591       consumeByte(insn, &insn->vectorExtensionPrefix[1]);
 592       consumeByte(insn, &insn->vectorExtensionPrefix[2]);
 593
 594       /* We simulate the REX prefix for simplicity's sake */
 595
 596       if (insn->mode == MODE_64BIT) {
 597         insn->rexPrefix = 0x40
 598                         | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3)
 599                         | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2)
 600                         | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1)
 601                         | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
 602       }
 603
 604       switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
 605       default:
 606         break;
 607       case VEX_PREFIX_66:
 608         hasOpSize = true;
 609         break;
 610       }
 611
 612       dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
 613                 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
 614                 insn->vectorExtensionPrefix[2]);
 615     }
 616   } else {
 617     if (insn->mode == MODE_64BIT) {
 618       if ((byte & 0xf0) == 0x40) {
 619         uint8_t opcodeByte;
 620
 621         if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
 622           dbgprintf(insn, "Redundant REX prefix");
 623           return -1;
 624         }
 625
 626         insn->rexPrefix = byte;
 627         insn->necessaryPrefixLocation = insn->readerCursor - 2;
 628
 629         dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
 630       } else {
 631         unconsumeByte(insn);
 632         insn->necessaryPrefixLocation = insn->readerCursor - 1;
 633       }
 634     } else {
 635       unconsumeByte(insn);
 636       insn->necessaryPrefixLocation = insn->readerCursor - 1;
 637     }
 638   }
 639
 640   if (insn->mode == MODE_16BIT) {
 641     insn->registerSize       = (hasOpSize ? 4 : 2);
 642     insn->addressSize        = (hasAdSize ? 4 : 2);
 643     insn->displacementSize   = (hasAdSize ? 4 : 2);
 644     insn->immediateSize      = (hasOpSize ? 4 : 2);
 645   } else if (insn->mode == MODE_32BIT) {
 646     insn->registerSize       = (hasOpSize ? 2 : 4);
 647     insn->addressSize        = (hasAdSize ? 2 : 4);
 648     insn->displacementSize   = (hasAdSize ? 2 : 4);
 649     insn->immediateSize      = (hasOpSize ? 2 : 4);
 650   } else if (insn->mode == MODE_64BIT) {
 651     if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
 652       insn->registerSize       = 8;
 653       insn->addressSize        = (hasAdSize ? 4 : 8);
 654       insn->displacementSize   = 4;
 655       insn->immediateSize      = 4;
 656     } else if (insn->rexPrefix) {
 657       insn->registerSize       = (hasOpSize ? 2 : 4);
 658       insn->addressSize        = (hasAdSize ? 4 : 8);
 659       insn->displacementSize   = (hasOpSize ? 2 : 4);
 660       insn->immediateSize      = (hasOpSize ? 2 : 4);
 661     } else {
 662       insn->registerSize       = (hasOpSize ? 2 : 4);
 663       insn->addressSize        = (hasAdSize ? 4 : 8);
 664       insn->displacementSize   = (hasOpSize ? 2 : 4);
 665       insn->immediateSize      = (hasOpSize ? 2 : 4);
 666     }
 667   }
 668
 669   return 0;
 670 }
 671
 672 /*
 673  * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
 674  *   extended or escape opcodes).
 675  *
 676  * @param insn  - The instruction whose opcode is to be read.
 677  * @return      - 0 if the opcode could be read successfully; nonzero otherwise.
 678  */
 679 static int readOpcode(struct InternalInstruction* insn) {
 680   /* Determine the length of the primary opcode */
 681
 682   uint8_t current;
 683
 684   dbgprintf(insn, "readOpcode()");
 685
 686   insn->opcodeType = ONEBYTE;
 687
 688   if (insn->vectorExtensionType == TYPE_EVEX) {
 689     switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
 690     default:
 691       dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)",
 692                 mmFromEVEX2of4(insn->vectorExtensionPrefix[1]));
 693       return -1;
 694     case VEX_LOB_0F:
 695       insn->opcodeType = TWOBYTE;
 696       return consumeByte(insn, &insn->opcode);
 697     case VEX_LOB_0F38:
 698       insn->opcodeType = THREEBYTE_38;
 699       return consumeByte(insn, &insn->opcode);
 700     case VEX_LOB_0F3A:
 701       insn->opcodeType = THREEBYTE_3A;
 702       return consumeByte(insn, &insn->opcode);
 703     }
 704   } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
 705     switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
 706     default:
 707       dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
 708                 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
 709       return -1;
 710     case VEX_LOB_0F:
 711       insn->opcodeType = TWOBYTE;
 712       return consumeByte(insn, &insn->opcode);
 713     case VEX_LOB_0F38:
 714       insn->opcodeType = THREEBYTE_38;
 715       return consumeByte(insn, &insn->opcode);
 716     case VEX_LOB_0F3A:
 717       insn->opcodeType = THREEBYTE_3A;
 718       return consumeByte(insn, &insn->opcode);
 719     }
 720   } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
 721     insn->opcodeType = TWOBYTE;
 722     return consumeByte(insn, &insn->opcode);
 723   } else if (insn->vectorExtensionType == TYPE_XOP) {
 724     switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
 725     default:
 726       dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
 727                 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
 728       return -1;
 729     case XOP_MAP_SELECT_8:
 730       insn->opcodeType = XOP8_MAP;
 731       return consumeByte(insn, &insn->opcode);
 732     case XOP_MAP_SELECT_9:
 733       insn->opcodeType = XOP9_MAP;
 734       return consumeByte(insn, &insn->opcode);
 735     case XOP_MAP_SELECT_A:
 736       insn->opcodeType = XOPA_MAP;
 737       return consumeByte(insn, &insn->opcode);
 738     }
 739   }
 740
 741   if (consumeByte(insn, &current))
 742     return -1;
 743
 744   if (current == 0x0f) {
 745     dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
 746
 747     if (consumeByte(insn, &current))
 748       return -1;
 749
 750     if (current == 0x38) {
 751       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
 752
 753       if (consumeByte(insn, &current))
 754         return -1;
 755
 756       insn->opcodeType = THREEBYTE_38;
 757     } else if (current == 0x3a) {
 758       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
 759
 760       if (consumeByte(insn, &current))
 761         return -1;
 762
 763       insn->opcodeType = THREEBYTE_3A;
 764     } else {
 765       dbgprintf(insn, "Didn't find a three-byte escape prefix");
 766
 767       insn->opcodeType = TWOBYTE;
 768     }
 769   }
 770
 771   /*
 772    * At this point we have consumed the full opcode.
 773    * Anything we consume from here on must be unconsumed.
 774    */
 775
 776   insn->opcode = current;
 777
 778   return 0;
 779 }
 780
 781 static int readModRM(struct InternalInstruction* insn);
 782
 783 /*
 784  * getIDWithAttrMask - Determines the ID of an instruction, consuming
 785  *   the ModR/M byte as appropriate for extended and escape opcodes,
 786  *   and using a supplied attribute mask.
 787  *
 788  * @param instructionID - A pointer whose target is filled in with the ID of the
 789  *                        instruction.
 790  * @param insn          - The instruction whose ID is to be determined.
 791  * @param attrMask      - The attribute mask to search.
 792  * @return              - 0 if the ModR/M could be read when needed or was not
 793  *                        needed; nonzero otherwise.
 794  */
 795 static int getIDWithAttrMask(uint16_t* instructionID,
 796                              struct InternalInstruction* insn,
 797                              uint16_t attrMask) {
 798   bool hasModRMExtension;
 799
 800   InstructionContext instructionClass = contextForAttrs(attrMask);
 801
 802   hasModRMExtension = modRMRequired(insn->opcodeType,
 803                                     instructionClass,
 804                                     insn->opcode);
 805
 806   if (hasModRMExtension) {
 807     if (readModRM(insn))
 808       return -1;
 809
 810     *instructionID = decode(insn->opcodeType,
 811                             instructionClass,
 812                             insn->opcode,
 813                             insn->modRM);
 814   } else {
 815     *instructionID = decode(insn->opcodeType,
 816                             instructionClass,
 817                             insn->opcode,
 818                             0);
 819   }
 820
 821   return 0;
 822 }
 823
 824 /*
 825  * is16BitEquivalent - Determines whether two instruction names refer to
 826  * equivalent instructions but one is 16-bit whereas the other is not.
 827  *
 828  * @param orig  - The instruction that is not 16-bit
 829  * @param equiv - The instruction that is 16-bit
 830  */
 831 static bool is16BitEquivalent(const char* orig, const char* equiv) {
 832   off_t i;
 833
 834   for (i = 0;; i++) {
 835     if (orig[i] == '\0' && equiv[i] == '\0')
 836       return true;
 837     if (orig[i] == '\0' || equiv[i] == '\0')
 838       return false;
 839     if (orig[i] != equiv[i]) {
 840       if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
 841         continue;
 842       if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
 843         continue;
 844       if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
 845         continue;
 846       return false;
 847     }
 848   }
 849 }
 850
 851 /*
 852  * is64Bit - Determines whether this instruction is a 64-bit instruction.
 853  *
 854  * @param name - The instruction that is not 16-bit
 855  */
 856 static bool is64Bit(const char* name) {
 857   off_t i;
 858
 859   for (i = 0;; ++i) {
 860     if (name[i] == '\0')
 861       return false;
 862     if (name[i] == '6' && name[i+1] == '4')
 863       return true;
 864   }
 865 }
 866
 867 /*
 868  * getID - Determines the ID of an instruction, consuming the ModR/M byte as
 869  *   appropriate for extended and escape opcodes.  Determines the attributes and
 870  *   context for the instruction before doing so.
 871  *
 872  * @param insn  - The instruction whose ID is to be determined.
 873  * @return      - 0 if the ModR/M could be read when needed or was not needed;
 874  *                nonzero otherwise.
 875  */
 876 static int getID(struct InternalInstruction* insn, const void *miiArg) {
 877   uint16_t attrMask;
 878   uint16_t instructionID;
 879
 880   dbgprintf(insn, "getID()");
 881
 882   attrMask = ATTR_NONE;
 883
 884   if (insn->mode == MODE_64BIT)
 885     attrMask |= ATTR_64BIT;
 886
 887   if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
 888     attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
 889
 890     if (insn->vectorExtensionType == TYPE_EVEX) {
 891       switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
 892       case VEX_PREFIX_66:
 893         attrMask |= ATTR_OPSIZE;
 894         break;
 895       case VEX_PREFIX_F3:
 896         attrMask |= ATTR_XS;
 897         break;
 898       case VEX_PREFIX_F2:
 899         attrMask |= ATTR_XD;
 900         break;
 901       }
 902
 903       if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
 904         attrMask |= ATTR_EVEXKZ;
 905       if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
 906         attrMask |= ATTR_EVEXB;
 907       if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
 908         attrMask |= ATTR_EVEXK;
 909       if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
 910         attrMask |= ATTR_EVEXL;
 911       if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
 912         attrMask |= ATTR_EVEXL2;
 913     } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
 914       switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
 915       case VEX_PREFIX_66:
 916         attrMask |= ATTR_OPSIZE;
 917         break;
 918       case VEX_PREFIX_F3:
 919         attrMask |= ATTR_XS;
 920         break;
 921       case VEX_PREFIX_F2:
 922         attrMask |= ATTR_XD;
 923         break;
 924       }
 925
 926       if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
 927         attrMask |= ATTR_VEXL;
 928     } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
 929       switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
 930       case VEX_PREFIX_66:
 931         attrMask |= ATTR_OPSIZE;
 932         break;
 933       case VEX_PREFIX_F3:
 934         attrMask |= ATTR_XS;
 935         break;
 936       case VEX_PREFIX_F2:
 937         attrMask |= ATTR_XD;
 938         break;
 939       }
 940
 941       if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
 942         attrMask |= ATTR_VEXL;
 943     } else if (insn->vectorExtensionType == TYPE_XOP) {
 944       switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
 945       case VEX_PREFIX_66:
 946         attrMask |= ATTR_OPSIZE;
 947         break;
 948       case VEX_PREFIX_F3:
 949         attrMask |= ATTR_XS;
 950         break;
 951       case VEX_PREFIX_F2:
 952         attrMask |= ATTR_XD;
 953         break;
 954       }
 955
 956       if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
 957         attrMask |= ATTR_VEXL;
 958     } else {
 959       return -1;
 960     }
 961   } else {
 962     if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
 963       attrMask |= ATTR_OPSIZE;
 964     else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
 965       attrMask |= ATTR_ADSIZE;
 966     else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
 967       attrMask |= ATTR_XS;
 968     else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
 969       attrMask |= ATTR_XD;
 970   }
 971
 972   if (insn->rexPrefix & 0x08)
 973     attrMask |= ATTR_REXW;
 974
 975   /*
 976    * JCXZ/JECXZ need special handling for 16-bit mode because the meaning
 977    * of the AdSize prefix is inverted w.r.t. 32-bit mode.
 978    */
 979   if (insn->mode == MODE_16BIT && insn->opcodeType == ONEBYTE &&
 980       insn->opcode == 0xE3)
 981     attrMask ^= ATTR_ADSIZE;
 982
 983   /*
 984    * In 64-bit mode all f64 superscripted opcodes ignore opcode size prefix
 985    * CALL/JMP/JCC instructions need to ignore 0x66 and consume 4 bytes
 986    */
 987
 988   if (insn->mode == MODE_64BIT &&
 989       isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) {
 990     switch (insn->opcode) {
 991     case 0xE8:
 992     case 0xE9:
 993       // Take care of psubsb and other mmx instructions.
 994       if (insn->opcodeType == ONEBYTE) {
 995         attrMask ^= ATTR_OPSIZE;
 996         insn->immediateSize = 4;
 997         insn->displacementSize = 4;
 998       }
 999       break;
1000     case 0x82:
1001     case 0x83:
1002     case 0x84:
1003     case 0x85:
1004     case 0x86:
1005     case 0x87:
1006     case 0x88:
1007     case 0x89:
1008     case 0x8A:
1009     case 0x8B:
1010     case 0x8C:
1011     case 0x8D:
1012     case 0x8E:
1013     case 0x8F:
1014       // Take care of lea and three byte ops.
1015       if (insn->opcodeType == TWOBYTE) {
1016         attrMask ^= ATTR_OPSIZE;
1017         insn->immediateSize = 4;
1018         insn->displacementSize = 4;
1019       }
1020       break;
1021     }
1022   }
1023
1024   if (getIDWithAttrMask(&instructionID, insn, attrMask))
1025     return -1;
1026
1027   /* The following clauses compensate for limitations of the tables. */
1028
1029   if (insn->mode != MODE_64BIT &&
1030       insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1031     /*
1032      * The tables can't distinquish between cases where the W-bit is used to
1033      * select register size and cases where its a required part of the opcode.
1034      */
1035     if ((insn->vectorExtensionType == TYPE_EVEX &&
1036          wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
1037         (insn->vectorExtensionType == TYPE_VEX_3B &&
1038          wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
1039         (insn->vectorExtensionType == TYPE_XOP &&
1040          wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
1041
1042       uint16_t instructionIDWithREXW;
1043       if (getIDWithAttrMask(&instructionIDWithREXW,
1044                             insn, attrMask | ATTR_REXW)) {
1045         insn->instructionID = instructionID;
1046         insn->spec = specifierForUID(instructionID);
1047         return 0;
1048       }
1049
1050       const char *SpecName = GetInstrName(instructionIDWithREXW, miiArg);
1051       // If not a 64-bit instruction. Switch the opcode.
1052       if (!is64Bit(SpecName)) {
1053         insn->instructionID = instructionIDWithREXW;
1054         insn->spec = specifierForUID(instructionIDWithREXW);
1055         return 0;
1056       }
1057     }
1058   }
1059
1060   /*
1061    * Absolute moves need special handling.
1062    * -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
1063    *  inverted w.r.t.
1064    * -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
1065    *  any position.
1066    */
1067   if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) {
1068     /* Make sure we observed the prefixes in any position. */
1069     if (insn->prefixPresent[0x67])
1070       attrMask |= ATTR_ADSIZE;
1071     if (insn->prefixPresent[0x66])
1072       attrMask |= ATTR_OPSIZE;
1073
1074     /* In 16-bit, invert the attributes. */
1075     if (insn->mode == MODE_16BIT)
1076       attrMask ^= ATTR_ADSIZE | ATTR_OPSIZE;
1077
1078     if (getIDWithAttrMask(&instructionID, insn, attrMask))
1079       return -1;
1080
1081     insn->instructionID = instructionID;
1082     insn->spec = specifierForUID(instructionID);
1083     return 0;
1084   }
1085
1086   if ((insn->mode == MODE_16BIT || insn->prefixPresent[0x66]) &&
1087       !(attrMask & ATTR_OPSIZE)) {
1088     /*
1089      * The instruction tables make no distinction between instructions that
1090      * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
1091      * particular spot (i.e., many MMX operations).  In general we're
1092      * conservative, but in the specific case where OpSize is present but not
1093      * in the right place we check if there's a 16-bit operation.
1094      */
1095
1096     const struct InstructionSpecifier *spec;
1097     uint16_t instructionIDWithOpsize;
1098     const char *specName, *specWithOpSizeName;
1099
1100     spec = specifierForUID(instructionID);
1101
1102     if (getIDWithAttrMask(&instructionIDWithOpsize,
1103                           insn,
1104                           attrMask | ATTR_OPSIZE)) {
1105       /*
1106        * ModRM required with OpSize but not present; give up and return version
1107        * without OpSize set
1108        */
1109
1110       insn->instructionID = instructionID;
1111       insn->spec = spec;
1112       return 0;
1113     }
1114
1115     specName = GetInstrName(instructionID, miiArg);
1116     specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg);
1117
1118     if (is16BitEquivalent(specName, specWithOpSizeName) &&
1119         (insn->mode == MODE_16BIT) ^ insn->prefixPresent[0x66]) {
1120       insn->instructionID = instructionIDWithOpsize;
1121       insn->spec = specifierForUID(instructionIDWithOpsize);
1122     } else {
1123       insn->instructionID = instructionID;
1124       insn->spec = spec;
1125     }
1126     return 0;
1127   }
1128
1129   if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
1130       insn->rexPrefix & 0x01) {
1131     /*
1132      * NOOP shouldn't decode as NOOP if REX.b is set. Instead
1133      * it should decode as XCHG %r8, %eax.
1134      */
1135
1136     const struct InstructionSpecifier *spec;
1137     uint16_t instructionIDWithNewOpcode;
1138     const struct InstructionSpecifier *specWithNewOpcode;
1139
1140     spec = specifierForUID(instructionID);
1141
1142     /* Borrow opcode from one of the other XCHGar opcodes */
1143     insn->opcode = 0x91;
1144
1145     if (getIDWithAttrMask(&instructionIDWithNewOpcode,
1146                           insn,
1147                           attrMask)) {
1148       insn->opcode = 0x90;
1149
1150       insn->instructionID = instructionID;
1151       insn->spec = spec;
1152       return 0;
1153     }
1154
1155     specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
1156
1157     /* Change back */
1158     insn->opcode = 0x90;
1159
1160     insn->instructionID = instructionIDWithNewOpcode;
1161     insn->spec = specWithNewOpcode;
1162
1163     return 0;
1164   }
1165
1166   insn->instructionID = instructionID;
1167   insn->spec = specifierForUID(insn->instructionID);
1168
1169   return 0;
1170 }
1171
1172 /*
1173  * readSIB - Consumes the SIB byte to determine addressing information for an
1174  *   instruction.
1175  *
1176  * @param insn  - The instruction whose SIB byte is to be read.
1177  * @return      - 0 if the SIB byte was successfully read; nonzero otherwise.
1178  */
1179 static int readSIB(struct InternalInstruction* insn) {
1180   SIBIndex sibIndexBase = SIB_INDEX_NONE;
1181   SIBBase sibBaseBase = SIB_BASE_NONE;
1182   uint8_t index, base;
1183
1184   dbgprintf(insn, "readSIB()");
1185
1186   if (insn->consumedSIB)
1187     return 0;
1188
1189   insn->consumedSIB = true;
1190
1191   switch (insn->addressSize) {
1192   case 2:
1193     dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
1194     return -1;
1195   case 4:
1196     sibIndexBase = SIB_INDEX_EAX;
1197     sibBaseBase = SIB_BASE_EAX;
1198     break;
1199   case 8:
1200     sibIndexBase = SIB_INDEX_RAX;
1201     sibBaseBase = SIB_BASE_RAX;
1202     break;
1203   }
1204
1205   if (consumeByte(insn, &insn->sib))
1206     return -1;
1207
1208   index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
1209
1210   // FIXME: The fifth bit (bit index 4) is only to be used for instructions
1211   // that understand VSIB indexing. ORing the bit in here is mildy dangerous
1212   // because performing math on an 'enum SIBIndex' can produce garbage.
1213   // Excluding the "none" value, it should cover 6 spaces of register names:
1214   //   - 16 possibilities for 16-bit GPR starting at SIB_INDEX_BX_SI
1215   //   - 16 possibilities for 32-bit GPR starting at SIB_INDEX_EAX
1216   //   - 16 possibilities for 64-bit GPR starting at SIB_INDEX_RAX
1217   //   - 32 possibilities for each of XMM, YMM, ZMM registers
1218   // When sibIndexBase gets assigned SIB_INDEX_RAX as it does in 64-bit mode,
1219   // summing in a fully decoded index between 0 and 31 can end up with a value
1220   // that looks like something in the low half of the XMM range.
1221   // translateRMMemory() tries to reverse the damage, with only partial success,
1222   // as evidenced by known bugs in "test/MC/Disassembler/X86/x86-64.txt"
1223   if (insn->vectorExtensionType == TYPE_EVEX)
1224     index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4;
1225
1226   if (index == 0x4) {
1227     insn->sibIndex = SIB_INDEX_NONE;
1228   } else {
1229     insn->sibIndex = (SIBIndex)(sibIndexBase + index);
1230   }
1231
1232   insn->sibScale = 1 << scaleFromSIB(insn->sib);
1233
1234   base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
1235
1236   switch (base) {
1237   case 0x5:
1238   case 0xd:
1239     switch (modFromModRM(insn->modRM)) {
1240     case 0x0:
1241       insn->eaDisplacement = EA_DISP_32;
1242       insn->sibBase = SIB_BASE_NONE;
1243       break;
1244     case 0x1:
1245       insn->eaDisplacement = EA_DISP_8;
1246       insn->sibBase = (SIBBase)(sibBaseBase + base);
1247       break;
1248     case 0x2:
1249       insn->eaDisplacement = EA_DISP_32;
1250       insn->sibBase = (SIBBase)(sibBaseBase + base);
1251       break;
1252     case 0x3:
1253       debug("Cannot have Mod = 0b11 and a SIB byte");
1254       return -1;
1255     }
1256     break;
1257   default:
1258     insn->sibBase = (SIBBase)(sibBaseBase + base);
1259     break;
1260   }
1261
1262   return 0;
1263 }
1264
1265 /*
1266  * readDisplacement - Consumes the displacement of an instruction.
1267  *
1268  * @param insn  - The instruction whose displacement is to be read.
1269  * @return      - 0 if the displacement byte was successfully read; nonzero
1270  *                otherwise.
1271  */
1272 static int readDisplacement(struct InternalInstruction* insn) {
1273   int8_t d8;
1274   int16_t d16;
1275   int32_t d32;
1276
1277   dbgprintf(insn, "readDisplacement()");
1278
1279   if (insn->consumedDisplacement)
1280     return 0;
1281
1282   insn->consumedDisplacement = true;
1283   insn->displacementOffset = insn->readerCursor - insn->startLocation;
1284
1285   switch (insn->eaDisplacement) {
1286   case EA_DISP_NONE:
1287     insn->consumedDisplacement = false;
1288     break;
1289   case EA_DISP_8:
1290     if (consumeInt8(insn, &d8))
1291       return -1;
1292     insn->displacement = d8;
1293     break;
1294   case EA_DISP_16:
1295     if (consumeInt16(insn, &d16))
1296       return -1;
1297     insn->displacement = d16;
1298     break;
1299   case EA_DISP_32:
1300     if (consumeInt32(insn, &d32))
1301       return -1;
1302     insn->displacement = d32;
1303     break;
1304   }
1305
1306   insn->consumedDisplacement = true;
1307   return 0;
1308 }
1309
1310 /*
1311  * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
1312  *   displacement) for an instruction and interprets it.
1313  *
1314  * @param insn  - The instruction whose addressing information is to be read.
1315  * @return      - 0 if the information was successfully read; nonzero otherwise.
1316  */
1317 static int readModRM(struct InternalInstruction* insn) {
1318   uint8_t mod, rm, reg;
1319
1320   dbgprintf(insn, "readModRM()");
1321
1322   if (insn->consumedModRM)
1323     return 0;
1324
1325   if (consumeByte(insn, &insn->modRM))
1326     return -1;
1327   insn->consumedModRM = true;
1328
1329   mod     = modFromModRM(insn->modRM);
1330   rm      = rmFromModRM(insn->modRM);
1331   reg     = regFromModRM(insn->modRM);
1332
1333   /*
1334    * This goes by insn->registerSize to pick the correct register, which messes
1335    * up if we're using (say) XMM or 8-bit register operands.  That gets fixed in
1336    * fixupReg().
1337    */
1338   switch (insn->registerSize) {
1339   case 2:
1340     insn->regBase = MODRM_REG_AX;
1341     insn->eaRegBase = EA_REG_AX;
1342     break;
1343   case 4:
1344     insn->regBase = MODRM_REG_EAX;
1345     insn->eaRegBase = EA_REG_EAX;
1346     break;
1347   case 8:
1348     insn->regBase = MODRM_REG_RAX;
1349     insn->eaRegBase = EA_REG_RAX;
1350     break;
1351   }
1352
1353   reg |= rFromREX(insn->rexPrefix) << 3;
1354   rm  |= bFromREX(insn->rexPrefix) << 3;
1355   if (insn->vectorExtensionType == TYPE_EVEX) {
1356     reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1357     rm  |=  xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1358   }
1359
1360   insn->reg = (Reg)(insn->regBase + reg);
1361
1362   switch (insn->addressSize) {
1363   case 2:
1364     insn->eaBaseBase = EA_BASE_BX_SI;
1365
1366     switch (mod) {
1367     case 0x0:
1368       if (rm == 0x6) {
1369         insn->eaBase = EA_BASE_NONE;
1370         insn->eaDisplacement = EA_DISP_16;
1371         if (readDisplacement(insn))
1372           return -1;
1373       } else {
1374         insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1375         insn->eaDisplacement = EA_DISP_NONE;
1376       }
1377       break;
1378     case 0x1:
1379       insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1380       insn->eaDisplacement = EA_DISP_8;
1381       insn->displacementSize = 1;
1382       if (readDisplacement(insn))
1383         return -1;
1384       break;
1385     case 0x2:
1386       insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1387       insn->eaDisplacement = EA_DISP_16;
1388       if (readDisplacement(insn))
1389         return -1;
1390       break;
1391     case 0x3:
1392       insn->eaBase = (EABase)(insn->eaRegBase + rm);
1393       if (readDisplacement(insn))
1394         return -1;
1395       break;
1396     }
1397     break;
1398   case 4:
1399   case 8:
1400     insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
1401
1402     switch (mod) {
1403     case 0x0:
1404       insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
1405       // In determining whether RIP-relative mode is used (rm=5),
1406       // or whether a SIB byte is present (rm=4),
1407       // the extension bits (REX.b and EVEX.x) are ignored.
1408       switch (rm & 7) {
1409       case 0x4: // SIB byte is present
1410         insn->eaBase = (insn->addressSize == 4 ?
1411                         EA_BASE_sib : EA_BASE_sib64);
1412         if (readSIB(insn) || readDisplacement(insn))
1413           return -1;
1414         break;
1415       case 0x5: // RIP-relative
1416         insn->eaBase = EA_BASE_NONE;
1417         insn->eaDisplacement = EA_DISP_32;
1418         if (readDisplacement(insn))
1419           return -1;
1420         break;
1421       default:
1422         insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1423         break;
1424       }
1425       break;
1426     case 0x1:
1427       insn->displacementSize = 1;
1428       /* FALLTHROUGH */
1429     case 0x2:
1430       insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
1431       switch (rm & 7) {
1432       case 0x4: // SIB byte is present
1433         insn->eaBase = EA_BASE_sib;
1434         if (readSIB(insn) || readDisplacement(insn))
1435           return -1;
1436         break;
1437       default:
1438         insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1439         if (readDisplacement(insn))
1440           return -1;
1441         break;
1442       }
1443       break;
1444     case 0x3:
1445       insn->eaDisplacement = EA_DISP_NONE;
1446       insn->eaBase = (EABase)(insn->eaRegBase + rm);
1447       break;
1448     }
1449     break;
1450   } /* switch (insn->addressSize) */
1451
1452   return 0;
1453 }
1454
1455 #define GENERIC_FIXUP_FUNC(name, base, prefix)            \
1456   static uint8_t name(struct InternalInstruction *insn,   \
1457                       OperandType type,                   \
1458                       uint8_t index,                      \
1459                       uint8_t *valid) {                   \
1460     *valid = 1;                                           \
1461     switch (type) {                                       \
1462     default:                                              \
1463       debug("Unhandled register type");                   \
1464       *valid = 0;                                         \
1465       return 0;                                           \
1466     case TYPE_Rv:                                         \
1467       return base + index;                                \
1468     case TYPE_R8:                                         \
1469       if (insn->rexPrefix &&                              \
1470          index >= 4 && index <= 7) {                      \
1471         return prefix##_SPL + (index - 4);                \
1472       } else {                                            \
1473         return prefix##_AL + index;                       \
1474       }                                                   \
1475     case TYPE_R16:                                        \
1476       return prefix##_AX + index;                         \
1477     case TYPE_R32:                                        \
1478       return prefix##_EAX + index;                        \
1479     case TYPE_R64:                                        \
1480       return prefix##_RAX + index;                        \
1481     case TYPE_XMM512:                                     \
1482       return prefix##_ZMM0 + index;                       \
1483     case TYPE_XMM256:                                     \
1484       return prefix##_YMM0 + index;                       \
1485     case TYPE_XMM128:                                     \
1486     case TYPE_XMM64:                                      \
1487     case TYPE_XMM32:                                      \
1488     case TYPE_XMM:                                        \
1489       return prefix##_XMM0 + index;                       \
1490     case TYPE_VK1:                                        \
1491     case TYPE_VK8:                                        \
1492     case TYPE_VK16:                                       \
1493       if (index > 7)                                      \
1494         *valid = 0;                                       \
1495       return prefix##_K0 + index;                         \
1496     case TYPE_MM64:                                       \
1497       return prefix##_MM0 + (index & 0x7);                \
1498     case TYPE_SEGMENTREG:                                 \
1499       if (index > 5)                                      \
1500         *valid = 0;                                       \
1501       return prefix##_ES + index;                         \
1502     case TYPE_DEBUGREG:                                   \
1503       return prefix##_DR0 + index;                        \
1504     case TYPE_CONTROLREG:                                 \
1505       return prefix##_CR0 + index;                        \
1506     }                                                     \
1507   }
1508
1509 /*
1510  * fixup*Value - Consults an operand type to determine the meaning of the
1511  *   reg or R/M field.  If the operand is an XMM operand, for example, an
1512  *   operand would be XMM0 instead of AX, which readModRM() would otherwise
1513  *   misinterpret it as.
1514  *
1515  * @param insn  - The instruction containing the operand.
1516  * @param type  - The operand type.
1517  * @param index - The existing value of the field as reported by readModRM().
1518  * @param valid - The address of a uint8_t.  The target is set to 1 if the
1519  *                field is valid for the register class; 0 if not.
1520  * @return      - The proper value.
1521  */
1522 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase,    MODRM_REG)
1523 GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG)
1524
1525 /*
1526  * fixupReg - Consults an operand specifier to determine which of the
1527  *   fixup*Value functions to use in correcting readModRM()'ss interpretation.
1528  *
1529  * @param insn  - See fixup*Value().
1530  * @param op    - The operand specifier.
1531  * @return      - 0 if fixup was successful; -1 if the register returned was
1532  *                invalid for its class.
1533  */
1534 static int fixupReg(struct InternalInstruction *insn,
1535                     const struct OperandSpecifier *op) {
1536   uint8_t valid;
1537
1538   dbgprintf(insn, "fixupReg()");
1539
1540   switch ((OperandEncoding)op->encoding) {
1541   default:
1542     debug("Expected a REG or R/M encoding in fixupReg");
1543     return -1;
1544   case ENCODING_VVVV:
1545     insn->vvvv = (Reg)fixupRegValue(insn,
1546                                     (OperandType)op->type,
1547                                     insn->vvvv,
1548                                     &valid);
1549     if (!valid)
1550       return -1;
1551     break;
1552   case ENCODING_REG:
1553     insn->reg = (Reg)fixupRegValue(insn,
1554                                    (OperandType)op->type,
1555                                    insn->reg - insn->regBase,
1556                                    &valid);
1557     if (!valid)
1558       return -1;
1559     break;
1560   CASE_ENCODING_RM:
1561     if (insn->eaBase >= insn->eaRegBase) {
1562       insn->eaBase = (EABase)fixupRMValue(insn,
1563                                           (OperandType)op->type,
1564                                           insn->eaBase - insn->eaRegBase,
1565                                           &valid);
1566       if (!valid)
1567         return -1;
1568     }
1569     break;
1570   }
1571
1572   return 0;
1573 }
1574
1575 /*
1576  * readOpcodeRegister - Reads an operand from the opcode field of an
1577  *   instruction and interprets it appropriately given the operand width.
1578  *   Handles AddRegFrm instructions.
1579  *
1580  * @param insn  - the instruction whose opcode field is to be read.
1581  * @param size  - The width (in bytes) of the register being specified.
1582  *                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1583  *                RAX.
1584  * @return      - 0 on success; nonzero otherwise.
1585  */
1586 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
1587   dbgprintf(insn, "readOpcodeRegister()");
1588
1589   if (size == 0)
1590     size = insn->registerSize;
1591
1592   switch (size) {
1593   case 1:
1594     insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1595                                                   | (insn->opcode & 7)));
1596     if (insn->rexPrefix &&
1597         insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1598         insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1599       insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1600                                    + (insn->opcodeRegister - MODRM_REG_AL - 4));
1601     }
1602
1603     break;
1604   case 2:
1605     insn->opcodeRegister = (Reg)(MODRM_REG_AX
1606                                  + ((bFromREX(insn->rexPrefix) << 3)
1607                                     | (insn->opcode & 7)));
1608     break;
1609   case 4:
1610     insn->opcodeRegister = (Reg)(MODRM_REG_EAX
1611                                  + ((bFromREX(insn->rexPrefix) << 3)
1612                                     | (insn->opcode & 7)));
1613     break;
1614   case 8:
1615     insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1616                                  + ((bFromREX(insn->rexPrefix) << 3)
1617                                     | (insn->opcode & 7)));
1618     break;
1619   }
1620
1621   return 0;
1622 }
1623
1624 /*
1625  * readImmediate - Consumes an immediate operand from an instruction, given the
1626  *   desired operand size.
1627  *
1628  * @param insn  - The instruction whose operand is to be read.
1629  * @param size  - The width (in bytes) of the operand.
1630  * @return      - 0 if the immediate was successfully consumed; nonzero
1631  *                otherwise.
1632  */
1633 static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
1634   uint8_t imm8;
1635   uint16_t imm16;
1636   uint32_t imm32;
1637   uint64_t imm64;
1638
1639   dbgprintf(insn, "readImmediate()");
1640
1641   if (insn->numImmediatesConsumed == 2) {
1642     debug("Already consumed two immediates");
1643     return -1;
1644   }
1645
1646   if (size == 0)
1647     size = insn->immediateSize;
1648   else
1649     insn->immediateSize = size;
1650   insn->immediateOffset = insn->readerCursor - insn->startLocation;
1651
1652   switch (size) {
1653   case 1:
1654     if (consumeByte(insn, &imm8))
1655       return -1;
1656     insn->immediates[insn->numImmediatesConsumed] = imm8;
1657     break;
1658   case 2:
1659     if (consumeUInt16(insn, &imm16))
1660       return -1;
1661     insn->immediates[insn->numImmediatesConsumed] = imm16;
1662     break;
1663   case 4:
1664     if (consumeUInt32(insn, &imm32))
1665       return -1;
1666     insn->immediates[insn->numImmediatesConsumed] = imm32;
1667     break;
1668   case 8:
1669     if (consumeUInt64(insn, &imm64))
1670       return -1;
1671     insn->immediates[insn->numImmediatesConsumed] = imm64;
1672     break;
1673   }
1674
1675   insn->numImmediatesConsumed++;
1676
1677   return 0;
1678 }
1679
1680 /*
1681  * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
1682  *
1683  * @param insn  - The instruction whose operand is to be read.
1684  * @return      - 0 if the vvvv was successfully consumed; nonzero
1685  *                otherwise.
1686  */
1687 static int readVVVV(struct InternalInstruction* insn) {
1688   dbgprintf(insn, "readVVVV()");
1689
1690   int vvvv;
1691   if (insn->vectorExtensionType == TYPE_EVEX)
1692     vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
1693             vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));
1694   else if (insn->vectorExtensionType == TYPE_VEX_3B)
1695     vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
1696   else if (insn->vectorExtensionType == TYPE_VEX_2B)
1697     vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
1698   else if (insn->vectorExtensionType == TYPE_XOP)
1699     vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
1700   else
1701     return -1;
1702
1703   if (insn->mode != MODE_64BIT)
1704     vvvv &= 0x7;
1705
1706   insn->vvvv = static_cast<Reg>(vvvv);
1707   return 0;
1708 }
1709
1710 /*
1711  * readMaskRegister - Reads an mask register from the opcode field of an
1712  *   instruction.
1713  *
1714  * @param insn    - The instruction whose opcode field is to be read.
1715  * @return        - 0 on success; nonzero otherwise.
1716  */
1717 static int readMaskRegister(struct InternalInstruction* insn) {
1718   dbgprintf(insn, "readMaskRegister()");
1719
1720   if (insn->vectorExtensionType != TYPE_EVEX)
1721     return -1;
1722
1723   insn->writemask =
1724       static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
1725   return 0;
1726 }
1727
1728 /*
1729  * readOperands - Consults the specifier for an instruction and consumes all
1730  *   operands for that instruction, interpreting them as it goes.
1731  *
1732  * @param insn  - The instruction whose operands are to be read and interpreted.
1733  * @return      - 0 if all operands could be read; nonzero otherwise.
1734  */
1735 static int readOperands(struct InternalInstruction* insn) {
1736   int hasVVVV, needVVVV;
1737   int sawRegImm = 0;
1738
1739   dbgprintf(insn, "readOperands()");
1740
1741   /* If non-zero vvvv specified, need to make sure one of the operands
1742      uses it. */
1743   hasVVVV = !readVVVV(insn);
1744   needVVVV = hasVVVV && (insn->vvvv != 0);
1745
1746   for (const auto &Op : x86OperandSets[insn->spec->operands]) {
1747     switch (Op.encoding) {
1748     case ENCODING_NONE:
1749     case ENCODING_SI:
1750     case ENCODING_DI:
1751       break;
1752     case ENCODING_REG:
1753     CASE_ENCODING_RM:
1754       if (readModRM(insn))
1755         return -1;
1756       if (fixupReg(insn, &Op))
1757         return -1;
1758       // Apply the AVX512 compressed displacement scaling factor.
1759       if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1760         insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
1761       break;
1762     case ENCODING_CB:
1763     case ENCODING_CW:
1764     case ENCODING_CD:
1765     case ENCODING_CP:
1766     case ENCODING_CO:
1767     case ENCODING_CT:
1768       dbgprintf(insn, "We currently don't hande code-offset encodings");
1769       return -1;
1770     case ENCODING_IB:
1771       if (sawRegImm) {
1772         /* Saw a register immediate so don't read again and instead split the
1773            previous immediate.  FIXME: This is a hack. */
1774         insn->immediates[insn->numImmediatesConsumed] =
1775           insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1776         ++insn->numImmediatesConsumed;
1777         break;
1778       }
1779       if (readImmediate(insn, 1))
1780         return -1;
1781       if (Op.type == TYPE_XMM128 ||
1782           Op.type == TYPE_XMM256)
1783         sawRegImm = 1;
1784       break;
1785     case ENCODING_IW:
1786       if (readImmediate(insn, 2))
1787         return -1;
1788       break;
1789     case ENCODING_ID:
1790       if (readImmediate(insn, 4))
1791         return -1;
1792       break;
1793     case ENCODING_IO:
1794       if (readImmediate(insn, 8))
1795         return -1;
1796       break;
1797     case ENCODING_Iv:
1798       if (readImmediate(insn, insn->immediateSize))
1799         return -1;
1800       break;
1801     case ENCODING_Ia:
1802       if (readImmediate(insn, insn->addressSize))
1803         return -1;
1804       break;
1805     case ENCODING_RB:
1806       if (readOpcodeRegister(insn, 1))
1807         return -1;
1808       break;
1809     case ENCODING_RW:
1810       if (readOpcodeRegister(insn, 2))
1811         return -1;
1812       break;
1813     case ENCODING_RD:
1814       if (readOpcodeRegister(insn, 4))
1815         return -1;
1816       break;
1817     case ENCODING_RO:
1818       if (readOpcodeRegister(insn, 8))
1819         return -1;
1820       break;
1821     case ENCODING_Rv:
1822       if (readOpcodeRegister(insn, 0))
1823         return -1;
1824       break;
1825     case ENCODING_FP:
1826       break;
1827     case ENCODING_VVVV:
1828       needVVVV = 0; /* Mark that we have found a VVVV operand. */
1829       if (!hasVVVV)
1830         return -1;
1831       if (fixupReg(insn, &Op))
1832         return -1;
1833       break;
1834     case ENCODING_WRITEMASK:
1835       if (readMaskRegister(insn))
1836         return -1;
1837       break;
1838     case ENCODING_DUP:
1839       break;
1840     default:
1841       dbgprintf(insn, "Encountered an operand with an unknown encoding.");
1842       return -1;
1843     }
1844   }
1845
1846   /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
1847   if (needVVVV) return -1;
1848
1849   return 0;
1850 }
1851
1852 /*
1853  * decodeInstruction - Reads and interprets a full instruction provided by the
1854  *   user.
1855  *
1856  * @param insn      - A pointer to the instruction to be populated.  Must be
1857  *                    pre-allocated.
1858  * @param reader    - The function to be used to read the instruction's bytes.
1859  * @param readerArg - A generic argument to be passed to the reader to store
1860  *                    any internal state.
1861  * @param logger    - If non-NULL, the function to be used to write log messages
1862  *                    and warnings.
1863  * @param loggerArg - A generic argument to be passed to the logger to store
1864  *                    any internal state.
1865  * @param startLoc  - The address (in the reader's address space) of the first
1866  *                    byte in the instruction.
1867  * @param mode      - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1868  *                    decode the instruction in.
1869  * @return          - 0 if the instruction's memory could be read; nonzero if
1870  *                    not.
1871  */
1872 int llvm::X86Disassembler::decodeInstruction(
1873     struct InternalInstruction *insn, byteReader_t reader,
1874     const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg,
1875     uint64_t startLoc, DisassemblerMode mode) {
1876   memset(insn, 0, sizeof(struct InternalInstruction));
1877
1878   insn->reader = reader;
1879   insn->readerArg = readerArg;
1880   insn->dlog = logger;
1881   insn->dlogArg = loggerArg;
1882   insn->startLocation = startLoc;
1883   insn->readerCursor = startLoc;
1884   insn->mode = mode;
1885   insn->numImmediatesConsumed = 0;
1886
1887   if (readPrefixes(insn)       ||
1888       readOpcode(insn)         ||
1889       getID(insn, miiArg)      ||
1890       insn->instructionID == 0 ||
1891       readOperands(insn))
1892     return -1;
1893
1894   insn->operands = x86OperandSets[insn->spec->operands];
1895
1896   insn->length = insn->readerCursor - insn->startLocation;
1897
1898   dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
1899             startLoc, insn->readerCursor, insn->length);
1900
1901   if (insn->length > 15)
1902     dbgprintf(insn, "Instruction exceeds 15-byte limit");
1903
1904   return 0;
1905 }