1 /*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
3 * The LLVM Compiler Infrastructure
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
8 *===----------------------------------------------------------------------===*
10 * This file is part of the X86 Disassembler.
11 * It contains the implementation of the instruction decoder.
12 * Documentation for the disassembler can be found in X86Disassembler.h.
14 *===----------------------------------------------------------------------===*/
16 #include <stdarg.h> /* for va_*() */
17 #include <stdio.h> /* for vsnprintf() */
18 #include <stdlib.h> /* for exit() */
19 #include <string.h> /* for memset() */
21 #include "X86DisassemblerDecoder.h"
23 #include "X86GenDisassemblerTables.inc"
31 #define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
33 #define debug(s) do { } while (0)
38 * contextForAttrs - Client for the instruction context table. Takes a set of
39 * attributes and returns the appropriate decode context.
41 * @param attrMask - Attributes, from the enumeration attributeBits.
42 * @return - The InstructionContext to use when looking up an
43 * an instruction with these attributes.
45 static InstructionContext contextForAttrs(uint8_t attrMask) {
46 return CONTEXTS_SYM[attrMask];
50 * modRMRequired - Reads the appropriate instruction table to determine whether
51 * the ModR/M byte is required to decode a particular instruction.
53 * @param type - The opcode type (i.e., how many bytes it has).
54 * @param insnContext - The context for the instruction, as returned by
56 * @param opcode - The last byte of the instruction's opcode, not counting
57 * ModR/M extensions and escapes.
58 * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
60 static int modRMRequired(OpcodeType type,
61 InstructionContext insnContext,
63 const struct ContextDecision* decision = 0;
67 decision = &ONEBYTE_SYM;
70 decision = &TWOBYTE_SYM;
73 decision = &THREEBYTE38_SYM;
76 decision = &THREEBYTE3A_SYM;
80 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
81 modrm_type != MODRM_ONEENTRY;
87 * decode - Reads the appropriate instruction table to obtain the unique ID of
90 * @param type - See modRMRequired().
91 * @param insnContext - See modRMRequired().
92 * @param opcode - See modRMRequired().
93 * @param modRM - The ModR/M byte if required, or any value if not.
94 * @return - The UID of the instruction, or 0 on failure.
96 static InstrUID decode(OpcodeType type,
97 InstructionContext insnContext,
100 const struct ModRMDecision* dec;
104 debug("Unknown opcode type");
107 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
110 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
113 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
116 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
120 switch (dec->modrm_type) {
122 debug("Corrupt table! Unknown modrm_type");
125 return dec->instructionIDs[0];
127 if (modFromModRM(modRM) == 0x3)
128 return dec->instructionIDs[1];
130 return dec->instructionIDs[0];
132 return dec->instructionIDs[modRM];
137 * specifierForUID - Given a UID, returns the name and operand specification for
140 * @param uid - The unique ID for the instruction. This should be returned by
141 * decode(); specifierForUID will not check bounds.
142 * @return - A pointer to the specification for that instruction.
144 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
145 return &INSTRUCTIONS_SYM[uid];
149 * consumeByte - Uses the reader function provided by the user to consume one
150 * byte from the instruction's memory and advance the cursor.
152 * @param insn - The instruction with the reader function to use. The cursor
153 * for this instruction is advanced.
154 * @param byte - A pointer to a pre-allocated memory buffer to be populated
155 * with the data read.
156 * @return - 0 if the read was successful; nonzero otherwise.
158 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
159 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
162 ++(insn->readerCursor);
168 * lookAtByte - Like consumeByte, but does not advance the cursor.
170 * @param insn - See consumeByte().
171 * @param byte - See consumeByte().
172 * @return - See consumeByte().
174 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
175 return insn->reader(insn->readerArg, byte, insn->readerCursor);
178 static void unconsumeByte(struct InternalInstruction* insn) {
179 insn->readerCursor--;
182 #define CONSUME_FUNC(name, type) \
183 static int name(struct InternalInstruction* insn, type* ptr) { \
186 for (offset = 0; offset < sizeof(type); ++offset) { \
188 int ret = insn->reader(insn->readerArg, \
190 insn->readerCursor + offset); \
193 combined = combined | ((type)byte << ((type)offset * 8)); \
196 insn->readerCursor += sizeof(type); \
201 * consume* - Use the reader function provided by the user to consume data
202 * values of various sizes from the instruction's memory and advance the
203 * cursor appropriately. These readers perform endian conversion.
205 * @param insn - See consumeByte().
206 * @param ptr - A pointer to a pre-allocated memory of appropriate size to
207 * be populated with the data read.
208 * @return - See consumeByte().
210 CONSUME_FUNC(consumeInt8, int8_t)
211 CONSUME_FUNC(consumeInt16, int16_t)
212 CONSUME_FUNC(consumeInt32, int32_t)
213 CONSUME_FUNC(consumeUInt16, uint16_t)
214 CONSUME_FUNC(consumeUInt32, uint32_t)
215 CONSUME_FUNC(consumeUInt64, uint64_t)
218 * dbgprintf - Uses the logging function provided by the user to log a single
219 * message, typically without a carriage-return.
221 * @param insn - The instruction containing the logging function.
222 * @param format - See printf().
223 * @param ... - See printf().
225 static void dbgprintf(struct InternalInstruction* insn,
234 va_start(ap, format);
235 (void)vsnprintf(buffer, sizeof(buffer), format, ap);
238 insn->dlog(insn->dlogArg, buffer);
244 * setPrefixPresent - Marks that a particular prefix is present at a particular
247 * @param insn - The instruction to be marked as having the prefix.
248 * @param prefix - The prefix that is present.
249 * @param location - The location where the prefix is located (in the address
250 * space of the instruction's reader).
252 static void setPrefixPresent(struct InternalInstruction* insn,
256 insn->prefixPresent[prefix] = 1;
257 insn->prefixLocations[prefix] = location;
261 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
262 * present at a given location.
264 * @param insn - The instruction to be queried.
265 * @param prefix - The prefix.
266 * @param location - The location to query.
267 * @return - Whether the prefix is at that location.
269 static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
273 if (insn->prefixPresent[prefix] == 1 &&
274 insn->prefixLocations[prefix] == location)
281 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
282 * instruction as having them. Also sets the instruction's default operand,
283 * address, and other relevant data sizes to report operands correctly.
285 * @param insn - The instruction whose prefixes are to be read.
286 * @return - 0 if the instruction could be read until the end of the prefix
287 * bytes, and no prefixes conflicted; nonzero otherwise.
289 static int readPrefixes(struct InternalInstruction* insn) {
290 BOOL isPrefix = TRUE;
291 BOOL prefixGroups[4] = { FALSE };
292 uint64_t prefixLocation;
295 BOOL hasAdSize = FALSE;
296 BOOL hasOpSize = FALSE;
298 dbgprintf(insn, "readPrefixes()");
301 prefixLocation = insn->readerCursor;
303 if (consumeByte(insn, &byte))
307 case 0xf0: /* LOCK */
308 case 0xf2: /* REPNE/REPNZ */
309 case 0xf3: /* REP or REPE/REPZ */
311 dbgprintf(insn, "Redundant Group 1 prefix");
312 prefixGroups[0] = TRUE;
313 setPrefixPresent(insn, byte, prefixLocation);
315 case 0x2e: /* CS segment override -OR- Branch not taken */
316 case 0x36: /* SS segment override -OR- Branch taken */
317 case 0x3e: /* DS segment override */
318 case 0x26: /* ES segment override */
319 case 0x64: /* FS segment override */
320 case 0x65: /* GS segment override */
323 insn->segmentOverride = SEG_OVERRIDE_CS;
326 insn->segmentOverride = SEG_OVERRIDE_SS;
329 insn->segmentOverride = SEG_OVERRIDE_DS;
332 insn->segmentOverride = SEG_OVERRIDE_ES;
335 insn->segmentOverride = SEG_OVERRIDE_FS;
338 insn->segmentOverride = SEG_OVERRIDE_GS;
341 debug("Unhandled override");
345 dbgprintf(insn, "Redundant Group 2 prefix");
346 prefixGroups[1] = TRUE;
347 setPrefixPresent(insn, byte, prefixLocation);
349 case 0x66: /* Operand-size override */
351 dbgprintf(insn, "Redundant Group 3 prefix");
352 prefixGroups[2] = TRUE;
354 setPrefixPresent(insn, byte, prefixLocation);
356 case 0x67: /* Address-size override */
358 dbgprintf(insn, "Redundant Group 4 prefix");
359 prefixGroups[3] = TRUE;
361 setPrefixPresent(insn, byte, prefixLocation);
363 default: /* Not a prefix byte */
369 dbgprintf(insn, "Found prefix 0x%hhx", byte);
372 if (insn->mode == MODE_64BIT) {
373 if ((byte & 0xf0) == 0x40) {
376 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
377 dbgprintf(insn, "Redundant REX prefix");
381 insn->rexPrefix = byte;
382 insn->necessaryPrefixLocation = insn->readerCursor - 2;
384 dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
387 insn->necessaryPrefixLocation = insn->readerCursor - 1;
393 if (insn->mode == MODE_16BIT) {
394 insn->registerSize = (hasOpSize ? 4 : 2);
395 insn->addressSize = (hasAdSize ? 4 : 2);
396 insn->displacementSize = (hasAdSize ? 4 : 2);
397 insn->immediateSize = (hasOpSize ? 4 : 2);
398 } else if (insn->mode == MODE_32BIT) {
399 insn->registerSize = (hasOpSize ? 2 : 4);
400 insn->addressSize = (hasAdSize ? 2 : 4);
401 insn->displacementSize = (hasAdSize ? 2 : 4);
402 insn->immediateSize = (hasOpSize ? 2 : 4);
403 } else if (insn->mode == MODE_64BIT) {
404 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
405 insn->registerSize = 8;
406 insn->addressSize = (hasAdSize ? 4 : 8);
407 insn->displacementSize = 4;
408 insn->immediateSize = 4;
409 } else if (insn->rexPrefix) {
410 insn->registerSize = (hasOpSize ? 2 : 4);
411 insn->addressSize = (hasAdSize ? 4 : 8);
412 insn->displacementSize = (hasOpSize ? 2 : 4);
413 insn->immediateSize = (hasOpSize ? 2 : 4);
415 insn->registerSize = (hasOpSize ? 2 : 4);
416 insn->addressSize = (hasAdSize ? 4 : 8);
417 insn->displacementSize = (hasOpSize ? 2 : 4);
418 insn->immediateSize = (hasOpSize ? 2 : 4);
426 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
427 * extended or escape opcodes).
429 * @param insn - The instruction whose opcode is to be read.
430 * @return - 0 if the opcode could be read successfully; nonzero otherwise.
432 static int readOpcode(struct InternalInstruction* insn) {
433 /* Determine the length of the primary opcode */
437 dbgprintf(insn, "readOpcode()");
439 insn->opcodeType = ONEBYTE;
440 if (consumeByte(insn, ¤t))
443 if (current == 0x0f) {
444 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
446 insn->twoByteEscape = current;
448 if (consumeByte(insn, ¤t))
451 if (current == 0x38) {
452 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
454 insn->threeByteEscape = current;
456 if (consumeByte(insn, ¤t))
459 insn->opcodeType = THREEBYTE_38;
460 } else if (current == 0x3a) {
461 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
463 insn->threeByteEscape = current;
465 if (consumeByte(insn, ¤t))
468 insn->opcodeType = THREEBYTE_3A;
470 dbgprintf(insn, "Didn't find a three-byte escape prefix");
472 insn->opcodeType = TWOBYTE;
477 * At this point we have consumed the full opcode.
478 * Anything we consume from here on must be unconsumed.
481 insn->opcode = current;
486 static int readModRM(struct InternalInstruction* insn);
489 * getIDWithAttrMask - Determines the ID of an instruction, consuming
490 * the ModR/M byte as appropriate for extended and escape opcodes,
491 * and using a supplied attribute mask.
493 * @param instructionID - A pointer whose target is filled in with the ID of the
495 * @param insn - The instruction whose ID is to be determined.
496 * @param attrMask - The attribute mask to search.
497 * @return - 0 if the ModR/M could be read when needed or was not
498 * needed; nonzero otherwise.
500 static int getIDWithAttrMask(uint16_t* instructionID,
501 struct InternalInstruction* insn,
503 BOOL hasModRMExtension;
505 uint8_t instructionClass;
507 instructionClass = contextForAttrs(attrMask);
509 hasModRMExtension = modRMRequired(insn->opcodeType,
513 if (hasModRMExtension) {
516 *instructionID = decode(insn->opcodeType,
521 *instructionID = decode(insn->opcodeType,
531 * is16BitEquivalent - Determines whether two instruction names refer to
532 * equivalent instructions but one is 16-bit whereas the other is not.
534 * @param orig - The instruction that is not 16-bit
535 * @param equiv - The instruction that is 16-bit
537 static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
541 if (orig[i] == '\0' && equiv[i] == '\0')
543 if (orig[i] == '\0' || equiv[i] == '\0')
545 if (orig[i] != equiv[i]) {
546 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
548 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
550 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
558 * is64BitEquivalent - Determines whether two instruction names refer to
559 * equivalent instructions but one is 64-bit whereas the other is not.
561 * @param orig - The instruction that is not 64-bit
562 * @param equiv - The instruction that is 64-bit
564 static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
568 if (orig[i] == '\0' && equiv[i] == '\0')
570 if (orig[i] == '\0' || equiv[i] == '\0')
572 if (orig[i] != equiv[i]) {
573 if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
575 if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
577 if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
586 * getID - Determines the ID of an instruction, consuming the ModR/M byte as
587 * appropriate for extended and escape opcodes. Determines the attributes and
588 * context for the instruction before doing so.
590 * @param insn - The instruction whose ID is to be determined.
591 * @return - 0 if the ModR/M could be read when needed or was not needed;
594 static int getID(struct InternalInstruction* insn) {
596 uint16_t instructionID;
598 dbgprintf(insn, "getID()");
600 attrMask = ATTR_NONE;
602 if (insn->mode == MODE_64BIT)
603 attrMask |= ATTR_64BIT;
605 if (insn->rexPrefix & 0x08)
606 attrMask |= ATTR_REXW;
608 if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
609 attrMask |= ATTR_OPSIZE;
610 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
612 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
615 if (getIDWithAttrMask(&instructionID, insn, attrMask))
618 /* The following clauses compensate for limitations of the tables. */
620 if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
622 * Although for SSE instructions it is usually necessary to treat REX.W+F2
623 * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
624 * an occasional instruction where F2 is incidental and REX.W is the more
625 * significant. If the decoded instruction is 32-bit and adding REX.W
626 * instead of F2 changes a 32 to a 64, we adopt the new encoding.
629 const struct InstructionSpecifier *spec;
630 uint16_t instructionIDWithREXw;
631 const struct InstructionSpecifier *specWithREXw;
633 spec = specifierForUID(instructionID);
635 if (getIDWithAttrMask(&instructionIDWithREXw,
637 attrMask & (~ATTR_XD))) {
639 * Decoding with REX.w would yield nothing; give up and return original
643 insn->instructionID = instructionID;
648 specWithREXw = specifierForUID(instructionIDWithREXw);
650 if (is64BitEquivalent(spec->name, specWithREXw->name)) {
651 insn->instructionID = instructionIDWithREXw;
652 insn->spec = specWithREXw;
654 insn->instructionID = instructionID;
660 if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
662 * The instruction tables make no distinction between instructions that
663 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
664 * particular spot (i.e., many MMX operations). In general we're
665 * conservative, but in the specific case where OpSize is present but not
666 * in the right place we check if there's a 16-bit operation.
669 const struct InstructionSpecifier *spec;
670 uint16_t instructionIDWithOpsize;
671 const struct InstructionSpecifier *specWithOpsize;
673 spec = specifierForUID(instructionID);
675 if (getIDWithAttrMask(&instructionIDWithOpsize,
677 attrMask | ATTR_OPSIZE)) {
679 * ModRM required with OpSize but not present; give up and return version
683 insn->instructionID = instructionID;
688 specWithOpsize = specifierForUID(instructionIDWithOpsize);
690 if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
691 insn->instructionID = instructionIDWithOpsize;
692 insn->spec = specWithOpsize;
694 insn->instructionID = instructionID;
700 insn->instructionID = instructionID;
701 insn->spec = specifierForUID(insn->instructionID);
707 * readSIB - Consumes the SIB byte to determine addressing information for an
710 * @param insn - The instruction whose SIB byte is to be read.
711 * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
713 static int readSIB(struct InternalInstruction* insn) {
714 SIBIndex sibIndexBase = 0;
715 SIBBase sibBaseBase = 0;
718 dbgprintf(insn, "readSIB()");
720 if (insn->consumedSIB)
723 insn->consumedSIB = TRUE;
725 switch (insn->addressSize) {
727 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
731 sibIndexBase = SIB_INDEX_EAX;
732 sibBaseBase = SIB_BASE_EAX;
735 sibIndexBase = SIB_INDEX_RAX;
736 sibBaseBase = SIB_BASE_RAX;
740 if (consumeByte(insn, &insn->sib))
743 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
747 insn->sibIndex = SIB_INDEX_NONE;
750 insn->sibIndex = (EABase)(sibIndexBase + index);
751 if (insn->sibIndex == SIB_INDEX_sib ||
752 insn->sibIndex == SIB_INDEX_sib64)
753 insn->sibIndex = SIB_INDEX_NONE;
757 switch (scaleFromSIB(insn->sib)) {
772 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
776 switch (modFromModRM(insn->modRM)) {
778 insn->eaDisplacement = EA_DISP_32;
779 insn->sibBase = SIB_BASE_NONE;
782 insn->eaDisplacement = EA_DISP_8;
783 insn->sibBase = (insn->addressSize == 4 ?
784 SIB_BASE_EBP : SIB_BASE_RBP);
787 insn->eaDisplacement = EA_DISP_32;
788 insn->sibBase = (insn->addressSize == 4 ?
789 SIB_BASE_EBP : SIB_BASE_RBP);
792 debug("Cannot have Mod = 0b11 and a SIB byte");
797 insn->sibBase = (EABase)(sibBaseBase + base);
805 * readDisplacement - Consumes the displacement of an instruction.
807 * @param insn - The instruction whose displacement is to be read.
808 * @return - 0 if the displacement byte was successfully read; nonzero
811 static int readDisplacement(struct InternalInstruction* insn) {
816 dbgprintf(insn, "readDisplacement()");
818 if (insn->consumedDisplacement)
821 insn->consumedDisplacement = TRUE;
823 switch (insn->eaDisplacement) {
825 insn->consumedDisplacement = FALSE;
828 if (consumeInt8(insn, &d8))
830 insn->displacement = d8;
833 if (consumeInt16(insn, &d16))
835 insn->displacement = d16;
838 if (consumeInt32(insn, &d32))
840 insn->displacement = d32;
844 insn->consumedDisplacement = TRUE;
849 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
850 * displacement) for an instruction and interprets it.
852 * @param insn - The instruction whose addressing information is to be read.
853 * @return - 0 if the information was successfully read; nonzero otherwise.
855 static int readModRM(struct InternalInstruction* insn) {
856 uint8_t mod, rm, reg;
858 dbgprintf(insn, "readModRM()");
860 if (insn->consumedModRM)
863 consumeByte(insn, &insn->modRM);
864 insn->consumedModRM = TRUE;
866 mod = modFromModRM(insn->modRM);
867 rm = rmFromModRM(insn->modRM);
868 reg = regFromModRM(insn->modRM);
871 * This goes by insn->registerSize to pick the correct register, which messes
872 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
875 switch (insn->registerSize) {
877 insn->regBase = MODRM_REG_AX;
878 insn->eaRegBase = EA_REG_AX;
881 insn->regBase = MODRM_REG_EAX;
882 insn->eaRegBase = EA_REG_EAX;
885 insn->regBase = MODRM_REG_RAX;
886 insn->eaRegBase = EA_REG_RAX;
890 reg |= rFromREX(insn->rexPrefix) << 3;
891 rm |= bFromREX(insn->rexPrefix) << 3;
893 insn->reg = (Reg)(insn->regBase + reg);
895 switch (insn->addressSize) {
897 insn->eaBaseBase = EA_BASE_BX_SI;
902 insn->eaBase = EA_BASE_NONE;
903 insn->eaDisplacement = EA_DISP_16;
904 if (readDisplacement(insn))
907 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
908 insn->eaDisplacement = EA_DISP_NONE;
912 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
913 insn->eaDisplacement = EA_DISP_8;
914 if (readDisplacement(insn))
918 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
919 insn->eaDisplacement = EA_DISP_16;
920 if (readDisplacement(insn))
924 insn->eaBase = (EABase)(insn->eaRegBase + rm);
925 if (readDisplacement(insn))
932 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
936 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
939 case 0xc: /* in case REXW.b is set */
940 insn->eaBase = (insn->addressSize == 4 ?
941 EA_BASE_sib : EA_BASE_sib64);
943 if (readDisplacement(insn))
947 insn->eaBase = EA_BASE_NONE;
948 insn->eaDisplacement = EA_DISP_32;
949 if (readDisplacement(insn))
953 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
959 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
962 case 0xc: /* in case REXW.b is set */
963 insn->eaBase = EA_BASE_sib;
965 if (readDisplacement(insn))
969 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
970 if (readDisplacement(insn))
976 insn->eaDisplacement = EA_DISP_NONE;
977 insn->eaBase = (EABase)(insn->eaRegBase + rm);
981 } /* switch (insn->addressSize) */
986 #define GENERIC_FIXUP_FUNC(name, base, prefix) \
987 static uint8_t name(struct InternalInstruction *insn, \
994 debug("Unhandled register type"); \
998 return base + index; \
1000 if (insn->rexPrefix && \
1001 index >= 4 && index <= 7) { \
1002 return prefix##_SPL + (index - 4); \
1004 return prefix##_AL + index; \
1007 return prefix##_AX + index; \
1009 return prefix##_EAX + index; \
1011 return prefix##_RAX + index; \
1016 return prefix##_XMM0 + index; \
1022 return prefix##_MM0 + index; \
1023 case TYPE_SEGMENTREG: \
1026 return prefix##_ES + index; \
1027 case TYPE_DEBUGREG: \
1030 return prefix##_DR0 + index; \
1031 case TYPE_CONTROLREG: \
1034 return prefix##_CR0 + index; \
1039 * fixup*Value - Consults an operand type to determine the meaning of the
1040 * reg or R/M field. If the operand is an XMM operand, for example, an
1041 * operand would be XMM0 instead of AX, which readModRM() would otherwise
1042 * misinterpret it as.
1044 * @param insn - The instruction containing the operand.
1045 * @param type - The operand type.
1046 * @param index - The existing value of the field as reported by readModRM().
1047 * @param valid - The address of a uint8_t. The target is set to 1 if the
1048 * field is valid for the register class; 0 if not.
1049 * @return - The proper value.
1051 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
1052 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
1055 * fixupReg - Consults an operand specifier to determine which of the
1056 * fixup*Value functions to use in correcting readModRM()'ss interpretation.
1058 * @param insn - See fixup*Value().
1059 * @param op - The operand specifier.
1060 * @return - 0 if fixup was successful; -1 if the register returned was
1061 * invalid for its class.
1063 static int fixupReg(struct InternalInstruction *insn,
1064 const struct OperandSpecifier *op) {
1067 dbgprintf(insn, "fixupReg()");
1069 switch ((OperandEncoding)op->encoding) {
1071 debug("Expected a REG or R/M encoding in fixupReg");
1074 insn->reg = (Reg)fixupRegValue(insn,
1075 (OperandType)op->type,
1076 insn->reg - insn->regBase,
1082 if (insn->eaBase >= insn->eaRegBase) {
1083 insn->eaBase = (EABase)fixupRMValue(insn,
1084 (OperandType)op->type,
1085 insn->eaBase - insn->eaRegBase,
1097 * readOpcodeModifier - Reads an operand from the opcode field of an
1098 * instruction. Handles AddRegFrm instructions.
1100 * @param insn - The instruction whose opcode field is to be read.
1101 * @param inModRM - Indicates that the opcode field is to be read from the
1102 * ModR/M extension; useful for escape opcodes
1103 * @return - 0 on success; nonzero otherwise.
1105 static int readOpcodeModifier(struct InternalInstruction* insn) {
1106 dbgprintf(insn, "readOpcodeModifier()");
1108 if (insn->consumedOpcodeModifier)
1111 insn->consumedOpcodeModifier = TRUE;
1113 switch (insn->spec->modifierType) {
1115 debug("Unknown modifier type.");
1118 debug("No modifier but an operand expects one.");
1120 case MODIFIER_OPCODE:
1121 insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
1123 case MODIFIER_MODRM:
1124 insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
1130 * readOpcodeRegister - Reads an operand from the opcode field of an
1131 * instruction and interprets it appropriately given the operand width.
1132 * Handles AddRegFrm instructions.
1134 * @param insn - See readOpcodeModifier().
1135 * @param size - The width (in bytes) of the register being specified.
1136 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1138 * @return - 0 on success; nonzero otherwise.
1140 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
1141 dbgprintf(insn, "readOpcodeRegister()");
1143 if (readOpcodeModifier(insn))
1147 size = insn->registerSize;
1151 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1152 | insn->opcodeModifier));
1153 if (insn->rexPrefix &&
1154 insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1155 insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1156 insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1157 + (insn->opcodeRegister - MODRM_REG_AL - 4));
1162 insn->opcodeRegister = (Reg)(MODRM_REG_AX
1163 + ((bFromREX(insn->rexPrefix) << 3)
1164 | insn->opcodeModifier));
1167 insn->opcodeRegister = (Reg)(MODRM_REG_EAX
1168 + ((bFromREX(insn->rexPrefix) << 3)
1169 | insn->opcodeModifier));
1172 insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1173 + ((bFromREX(insn->rexPrefix) << 3)
1174 | insn->opcodeModifier));
1182 * readImmediate - Consumes an immediate operand from an instruction, given the
1183 * desired operand size.
1185 * @param insn - The instruction whose operand is to be read.
1186 * @param size - The width (in bytes) of the operand.
1187 * @return - 0 if the immediate was successfully consumed; nonzero
1190 static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
1196 dbgprintf(insn, "readImmediate()");
1198 if (insn->numImmediatesConsumed == 2) {
1199 debug("Already consumed two immediates");
1204 size = insn->immediateSize;
1206 insn->immediateSize = size;
1210 if (consumeByte(insn, &imm8))
1212 insn->immediates[insn->numImmediatesConsumed] = imm8;
1215 if (consumeUInt16(insn, &imm16))
1217 insn->immediates[insn->numImmediatesConsumed] = imm16;
1220 if (consumeUInt32(insn, &imm32))
1222 insn->immediates[insn->numImmediatesConsumed] = imm32;
1225 if (consumeUInt64(insn, &imm64))
1227 insn->immediates[insn->numImmediatesConsumed] = imm64;
1231 insn->numImmediatesConsumed++;
1237 * readOperands - Consults the specifier for an instruction and consumes all
1238 * operands for that instruction, interpreting them as it goes.
1240 * @param insn - The instruction whose operands are to be read and interpreted.
1241 * @return - 0 if all operands could be read; nonzero otherwise.
1243 static int readOperands(struct InternalInstruction* insn) {
1246 dbgprintf(insn, "readOperands()");
1248 for (index = 0; index < X86_MAX_OPERANDS; ++index) {
1249 switch (insn->spec->operands[index].encoding) {
1254 if (readModRM(insn))
1256 if (fixupReg(insn, &insn->spec->operands[index]))
1265 dbgprintf(insn, "We currently don't hande code-offset encodings");
1268 if (readImmediate(insn, 1))
1270 if (insn->spec->operands[index].type == TYPE_IMM3 &&
1271 insn->immediates[insn->numImmediatesConsumed - 1] > 7)
1275 if (readImmediate(insn, 2))
1279 if (readImmediate(insn, 4))
1283 if (readImmediate(insn, 8))
1287 if (readImmediate(insn, insn->immediateSize))
1291 if (readImmediate(insn, insn->addressSize))
1295 if (readOpcodeRegister(insn, 1))
1299 if (readOpcodeRegister(insn, 2))
1303 if (readOpcodeRegister(insn, 4))
1307 if (readOpcodeRegister(insn, 8))
1311 if (readOpcodeRegister(insn, 0))
1315 if (readOpcodeModifier(insn))
1320 dbgprintf(insn, "Encountered an operand with an unknown encoding.");
1329 * decodeInstruction - Reads and interprets a full instruction provided by the
1332 * @param insn - A pointer to the instruction to be populated. Must be
1334 * @param reader - The function to be used to read the instruction's bytes.
1335 * @param readerArg - A generic argument to be passed to the reader to store
1336 * any internal state.
1337 * @param logger - If non-NULL, the function to be used to write log messages
1339 * @param loggerArg - A generic argument to be passed to the logger to store
1340 * any internal state.
1341 * @param startLoc - The address (in the reader's address space) of the first
1342 * byte in the instruction.
1343 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1344 * decode the instruction in.
1345 * @return - 0 if the instruction's memory could be read; nonzero if
1348 int decodeInstruction(struct InternalInstruction* insn,
1349 byteReader_t reader,
1354 DisassemblerMode mode) {
1355 memset(insn, 0, sizeof(struct InternalInstruction));
1357 insn->reader = reader;
1358 insn->readerArg = readerArg;
1359 insn->dlog = logger;
1360 insn->dlogArg = loggerArg;
1361 insn->startLocation = startLoc;
1362 insn->readerCursor = startLoc;
1364 insn->numImmediatesConsumed = 0;
1366 if (readPrefixes(insn) ||
1369 insn->instructionID == 0 ||
1373 insn->length = insn->readerCursor - insn->startLocation;
1375 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
1376 startLoc, insn->readerCursor, insn->length);
1378 if (insn->length > 15)
1379 dbgprintf(insn, "Instruction exceeds 15-byte limit");