*
*===----------------------------------------------------------------------===*/
-#include <assert.h> /* for assert() */
#include <stdarg.h> /* for va_*() */
#include <stdio.h> /* for vsnprintf() */
#include <stdlib.h> /* for exit() */
#define TRUE 1
#define FALSE 0
-#ifdef __GNUC__
-#define NORETURN __attribute__((noreturn))
+typedef int8_t bool;
+
+#ifndef NDEBUG
+#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
#else
-#define NORETURN
+#define debug(s) do { } while (0)
#endif
-#define unreachable(s) \
- do { \
- fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, s); \
- exit(-1); \
- } while (0);
/*
* contextForAttrs - Client for the instruction context table. Takes a set of
* @return - The InstructionContext to use when looking up an
* an instruction with these attributes.
*/
-static inline InstructionContext contextForAttrs(uint8_t attrMask) {
+static InstructionContext contextForAttrs(uint8_t attrMask) {
return CONTEXTS_SYM[attrMask];
}
* ModR/M extensions and escapes.
* @return - TRUE if the ModR/M byte is required, FALSE otherwise.
*/
-static inline int modRMRequired(OpcodeType type,
+static int modRMRequired(OpcodeType type,
InstructionContext insnContext,
uint8_t opcode) {
- const struct ContextDecision* decision;
+ const struct ContextDecision* decision = 0;
switch (type) {
case ONEBYTE:
case THREEBYTE_3A:
decision = &THREEBYTE3A_SYM;
break;
+ case THREEBYTE_A6:
+ decision = &THREEBYTEA6_SYM;
+ break;
+ case THREEBYTE_A7:
+ decision = &THREEBYTEA7_SYM;
+ break;
}
return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
modrm_type != MODRM_ONEENTRY;
- unreachable("Unknown opcode type");
return 0;
}
* @param insnContext - See modRMRequired().
* @param opcode - See modRMRequired().
* @param modRM - The ModR/M byte if required, or any value if not.
+ * @return - The UID of the instruction, or 0 on failure.
*/
-static inline InstrUID decode(OpcodeType type,
- InstructionContext insnContext,
- uint8_t opcode,
- uint8_t modRM) {
- struct ModRMDecision* dec;
+static InstrUID decode(OpcodeType type,
+ InstructionContext insnContext,
+ uint8_t opcode,
+ uint8_t modRM) {
+ const struct ModRMDecision* dec;
switch (type) {
default:
- unreachable("Unknown opcode type");
+ debug("Unknown opcode type");
+ return 0;
case ONEBYTE:
dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
case THREEBYTE_3A:
dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
+ case THREEBYTE_A6:
+ dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_A7:
+ dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
}
switch (dec->modrm_type) {
default:
- unreachable("Corrupt table! Unknown modrm_type");
+ debug("Corrupt table! Unknown modrm_type");
+ return 0;
case MODRM_ONEENTRY:
return dec->instructionIDs[0];
case MODRM_SPLITRM:
case MODRM_FULL:
return dec->instructionIDs[modRM];
}
-
- return 0;
}
/*
* decode(); specifierForUID will not check bounds.
* @return - A pointer to the specification for that instruction.
*/
-static inline struct InstructionSpecifier* specifierForUID(InstrUID uid) {
+static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
return &INSTRUCTIONS_SYM[uid];
}
* with the data read.
* @return - 0 if the read was successful; nonzero otherwise.
*/
-static inline int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
+static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
if (!ret)
* @param byte - See consumeByte().
* @return - See consumeByte().
*/
-static inline int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
+static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
return insn->reader(insn->readerArg, byte, insn->readerCursor);
}
-static inline void unconsumeByte(struct InternalInstruction* insn) {
+static void unconsumeByte(struct InternalInstruction* insn) {
insn->readerCursor--;
}
-#define CONSUME_FUNC(name, type) \
- static inline int name(struct InternalInstruction* insn, type* ptr) { \
- type combined = 0; \
- unsigned offset; \
- for (offset = 0; offset < sizeof(type); ++offset) { \
- uint8_t byte; \
- int ret = insn->reader(insn->readerArg, \
- &byte, \
- insn->readerCursor + offset); \
- if (ret) \
- return ret; \
- combined = combined | ((type)byte << ((type)offset * 8)); \
- } \
- *ptr = combined; \
- insn->readerCursor += sizeof(type); \
- return 0; \
+#define CONSUME_FUNC(name, type) \
+ static int name(struct InternalInstruction* insn, type* ptr) { \
+ type combined = 0; \
+ unsigned offset; \
+ for (offset = 0; offset < sizeof(type); ++offset) { \
+ uint8_t byte; \
+ int ret = insn->reader(insn->readerArg, \
+ &byte, \
+ insn->readerCursor + offset); \
+ if (ret) \
+ return ret; \
+ combined = combined | ((type)byte << ((type)offset * 8)); \
+ } \
+ *ptr = combined; \
+ insn->readerCursor += sizeof(type); \
+ return 0; \
}
/*
CONSUME_FUNC(consumeUInt64, uint64_t)
/*
- * dprintf - Uses the logging function provided by the user to log a single
+ * dbgprintf - Uses the logging function provided by the user to log a single
* message, typically without a carriage-return.
*
* @param insn - The instruction containing the logging function.
* @param format - See printf().
* @param ... - See printf().
*/
-static inline void dprintf(struct InternalInstruction* insn,
- const char* format,
- ...) {
+static void dbgprintf(struct InternalInstruction* insn,
+ const char* format,
+ ...) {
char buffer[256];
va_list ap;
* @param location - The location where the prefix is located (in the address
* space of the instruction's reader).
*/
-static inline void setPrefixPresent(struct InternalInstruction* insn,
+static void setPrefixPresent(struct InternalInstruction* insn,
uint8_t prefix,
uint64_t location)
{
* @param location - The location to query.
* @return - Whether the prefix is at that location.
*/
-static inline BOOL isPrefixAtLocation(struct InternalInstruction* insn,
- uint8_t prefix,
- uint64_t location)
+static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
+ uint8_t prefix,
+ uint64_t location)
{
if (insn->prefixPresent[prefix] == 1 &&
insn->prefixLocations[prefix] == location)
BOOL isPrefix = TRUE;
BOOL prefixGroups[4] = { FALSE };
uint64_t prefixLocation;
- uint8_t byte;
+ uint8_t byte = 0;
BOOL hasAdSize = FALSE;
BOOL hasOpSize = FALSE;
- dprintf(insn, "readPrefixes()");
+ dbgprintf(insn, "readPrefixes()");
while (isPrefix) {
prefixLocation = insn->readerCursor;
case 0xf2: /* REPNE/REPNZ */
case 0xf3: /* REP or REPE/REPZ */
if (prefixGroups[0])
- dprintf(insn, "Redundant Group 1 prefix");
+ dbgprintf(insn, "Redundant Group 1 prefix");
prefixGroups[0] = TRUE;
setPrefixPresent(insn, byte, prefixLocation);
break;
insn->segmentOverride = SEG_OVERRIDE_GS;
break;
default:
- unreachable("Unhandled override");
+ debug("Unhandled override");
+ return -1;
}
if (prefixGroups[1])
- dprintf(insn, "Redundant Group 2 prefix");
+ dbgprintf(insn, "Redundant Group 2 prefix");
prefixGroups[1] = TRUE;
setPrefixPresent(insn, byte, prefixLocation);
break;
case 0x66: /* Operand-size override */
if (prefixGroups[2])
- dprintf(insn, "Redundant Group 3 prefix");
+ dbgprintf(insn, "Redundant Group 3 prefix");
prefixGroups[2] = TRUE;
hasOpSize = TRUE;
setPrefixPresent(insn, byte, prefixLocation);
break;
case 0x67: /* Address-size override */
if (prefixGroups[3])
- dprintf(insn, "Redundant Group 4 prefix");
+ dbgprintf(insn, "Redundant Group 4 prefix");
prefixGroups[3] = TRUE;
hasAdSize = TRUE;
setPrefixPresent(insn, byte, prefixLocation);
}
if (isPrefix)
- dprintf(insn, "Found prefix 0x%hhx", byte);
+ dbgprintf(insn, "Found prefix 0x%hhx", byte);
}
+
+ insn->vexSize = 0;
- if (insn->mode == MODE_64BIT) {
- if ((byte & 0xf0) == 0x40) {
- uint8_t opcodeByte;
+ if (byte == 0xc4) {
+ uint8_t byte1;
- if(lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
- dprintf(insn, "Redundant REX prefix");
- return -1;
+ if (lookAtByte(insn, &byte1)) {
+ dbgprintf(insn, "Couldn't read second byte of VEX");
+ return -1;
+ }
+
+ if (insn->mode == MODE_64BIT || byte1 & 0x8) {
+ insn->vexSize = 3;
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+ else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+
+ if (insn->vexSize == 3) {
+ insn->vexPrefix[0] = byte;
+ consumeByte(insn, &insn->vexPrefix[1]);
+ consumeByte(insn, &insn->vexPrefix[2]);
+
+ /* We simulate the REX prefix for simplicity's sake */
+
+ insn->rexPrefix = 0x40
+ | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
+ | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
+ | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
+ | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
+
+ switch (ppFromVEX3of3(insn->vexPrefix[2]))
+ {
+ default:
+ break;
+ case VEX_PREFIX_66:
+ hasOpSize = TRUE;
+ break;
}
+
+ dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
+ }
+ }
+ else if (byte == 0xc5) {
+ uint8_t byte1;
+
+ if (lookAtByte(insn, &byte1)) {
+ dbgprintf(insn, "Couldn't read second byte of VEX");
+ return -1;
+ }
- insn->rexPrefix = byte;
- insn->necessaryPrefixLocation = insn->readerCursor - 2;
-
- dprintf(insn, "Found REX prefix 0x%hhx", byte);
- } else {
+ if (insn->mode == MODE_64BIT || byte1 & 0x8) {
+ insn->vexSize = 2;
+ }
+ else {
+ unconsumeByte(insn);
+ }
+
+ if (insn->vexSize == 2) {
+ insn->vexPrefix[0] = byte;
+ consumeByte(insn, &insn->vexPrefix[1]);
+
+ insn->rexPrefix = 0x40
+ | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
+
+ switch (ppFromVEX2of2(insn->vexPrefix[1]))
+ {
+ default:
+ break;
+ case VEX_PREFIX_66:
+ hasOpSize = TRUE;
+ break;
+ }
+
+ dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
+ }
+ }
+ else {
+ if (insn->mode == MODE_64BIT) {
+ if ((byte & 0xf0) == 0x40) {
+ uint8_t opcodeByte;
+
+ if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
+ dbgprintf(insn, "Redundant REX prefix");
+ return -1;
+ }
+
+ insn->rexPrefix = byte;
+ insn->necessaryPrefixLocation = insn->readerCursor - 2;
+
+ dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
+ } else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+ } else {
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
- } else {
- unconsumeByte(insn);
}
-
+
if (insn->mode == MODE_16BIT) {
insn->registerSize = (hasOpSize ? 4 : 2);
insn->addressSize = (hasAdSize ? 4 : 2);
insn->registerSize = (hasOpSize ? 2 : 4);
insn->addressSize = (hasAdSize ? 2 : 4);
insn->displacementSize = (hasAdSize ? 2 : 4);
- insn->immediateSize = (hasAdSize ? 2 : 4);
+ insn->immediateSize = (hasOpSize ? 2 : 4);
} else if (insn->mode == MODE_64BIT) {
if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
insn->registerSize = 8;
uint8_t current;
- dprintf(insn, "readOpcode()");
+ dbgprintf(insn, "readOpcode()");
insn->opcodeType = ONEBYTE;
+
+ if (insn->vexSize == 3)
+ {
+ switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
+ {
+ default:
+ dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
+ return -1;
+ case 0:
+ break;
+ case VEX_LOB_0F:
+ insn->twoByteEscape = 0x0f;
+ insn->opcodeType = TWOBYTE;
+ return consumeByte(insn, &insn->opcode);
+ case VEX_LOB_0F38:
+ insn->twoByteEscape = 0x0f;
+ insn->threeByteEscape = 0x38;
+ insn->opcodeType = THREEBYTE_38;
+ return consumeByte(insn, &insn->opcode);
+ case VEX_LOB_0F3A:
+ insn->twoByteEscape = 0x0f;
+ insn->threeByteEscape = 0x3a;
+ insn->opcodeType = THREEBYTE_3A;
+ return consumeByte(insn, &insn->opcode);
+ }
+ }
+ else if (insn->vexSize == 2)
+ {
+ insn->twoByteEscape = 0x0f;
+ insn->opcodeType = TWOBYTE;
+ return consumeByte(insn, &insn->opcode);
+ }
+
if (consumeByte(insn, ¤t))
return -1;
if (current == 0x0f) {
- dprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
+ dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
insn->twoByteEscape = current;
return -1;
if (current == 0x38) {
- dprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
insn->threeByteEscape = current;
insn->opcodeType = THREEBYTE_38;
} else if (current == 0x3a) {
- dprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
insn->threeByteEscape = current;
return -1;
insn->opcodeType = THREEBYTE_3A;
+ } else if (current == 0xa6) {
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ insn->threeByteEscape = current;
+
+ if (consumeByte(insn, ¤t))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_A6;
+ } else if (current == 0xa7) {
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ insn->threeByteEscape = current;
+
+ if (consumeByte(insn, ¤t))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_A7;
} else {
- dprintf(insn, "Didn't find a three-byte escape prefix");
+ dbgprintf(insn, "Didn't find a three-byte escape prefix");
insn->opcodeType = TWOBYTE;
}
insn->opcode);
if (hasModRMExtension) {
- readModRM(insn);
+ if (readModRM(insn))
+ return -1;
*instructionID = decode(insn->opcodeType,
instructionClass,
static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
off_t i;
- for(i = 0;; i++) {
- if(orig[i] == '\0' && equiv[i] == '\0')
+ for (i = 0;; i++) {
+ if (orig[i] == '\0' && equiv[i] == '\0')
return TRUE;
- if(orig[i] == '\0' || equiv[i] == '\0')
+ if (orig[i] == '\0' || equiv[i] == '\0')
return FALSE;
- if(orig[i] != equiv[i]) {
- if((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
+ if (orig[i] != equiv[i]) {
+ if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
continue;
- if((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
+ if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
continue;
- if((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
+ if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
continue;
return FALSE;
}
static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
off_t i;
- for(i = 0;; i++) {
- if(orig[i] == '\0' && equiv[i] == '\0')
+ for (i = 0;; i++) {
+ if (orig[i] == '\0' && equiv[i] == '\0')
return TRUE;
- if(orig[i] == '\0' || equiv[i] == '\0')
+ if (orig[i] == '\0' || equiv[i] == '\0')
return FALSE;
- if(orig[i] != equiv[i]) {
- if((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
+ if (orig[i] != equiv[i]) {
+ if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
continue;
- if((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
+ if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
continue;
- if((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
+ if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
continue;
return FALSE;
}
uint8_t attrMask;
uint16_t instructionID;
- dprintf(insn, "getID()");
+ dbgprintf(insn, "getID()");
attrMask = ATTR_NONE;
-
+
if (insn->mode == MODE_64BIT)
attrMask |= ATTR_64BIT;
-
- if (insn->rexPrefix & 0x08)
- attrMask |= ATTR_REXW;
-
- if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
- attrMask |= ATTR_OPSIZE;
- else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
- attrMask |= ATTR_XS;
- else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
- attrMask |= ATTR_XD;
-
- if(getIDWithAttrMask(&instructionID, insn, attrMask))
+
+ if (insn->vexSize) {
+ attrMask |= ATTR_VEX;
+
+ if (insn->vexSize == 3) {
+ switch (ppFromVEX3of3(insn->vexPrefix[2])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (wFromVEX3of3(insn->vexPrefix[2]))
+ attrMask |= ATTR_REXW;
+ if (lFromVEX3of3(insn->vexPrefix[2]))
+ attrMask |= ATTR_VEXL;
+ }
+ else if (insn->vexSize == 2) {
+ switch (ppFromVEX2of2(insn->vexPrefix[1])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (lFromVEX2of2(insn->vexPrefix[1]))
+ attrMask |= ATTR_VEXL;
+ }
+ else {
+ return -1;
+ }
+ }
+ else {
+ if (insn->rexPrefix & 0x08)
+ attrMask |= ATTR_REXW;
+
+ if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_OPSIZE;
+ else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_XS;
+ else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_XD;
+
+ }
+
+ if (getIDWithAttrMask(&instructionID, insn, attrMask))
return -1;
/* The following clauses compensate for limitations of the tables. */
* instead of F2 changes a 32 to a 64, we adopt the new encoding.
*/
- struct InstructionSpecifier* spec;
+ const struct InstructionSpecifier *spec;
uint16_t instructionIDWithREXw;
- struct InstructionSpecifier* specWithREXw;
+ const struct InstructionSpecifier *specWithREXw;
spec = specifierForUID(instructionID);
* in the right place we check if there's a 16-bit operation.
*/
- struct InstructionSpecifier* spec;
+ const struct InstructionSpecifier *spec;
uint16_t instructionIDWithOpsize;
- struct InstructionSpecifier* specWithOpsize;
+ const struct InstructionSpecifier *specWithOpsize;
spec = specifierForUID(instructionID);
* @return - 0 if the SIB byte was successfully read; nonzero otherwise.
*/
static int readSIB(struct InternalInstruction* insn) {
- SIBIndex sibIndexBase;
- SIBBase sibBaseBase;
+ SIBIndex sibIndexBase = 0;
+ SIBBase sibBaseBase = 0;
uint8_t index, base;
- dprintf(insn, "readSIB()");
+ dbgprintf(insn, "readSIB()");
if (insn->consumedSIB)
return 0;
switch (insn->addressSize) {
case 2:
- dprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
+ dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
return -1;
break;
case 4:
insn->sibIndex = SIB_INDEX_NONE;
break;
default:
- insn->sibIndex = (EABase)(sibIndexBase + index);
+ insn->sibIndex = (SIBIndex)(sibIndexBase + index);
if (insn->sibIndex == SIB_INDEX_sib ||
insn->sibIndex == SIB_INDEX_sib64)
insn->sibIndex = SIB_INDEX_NONE;
SIB_BASE_EBP : SIB_BASE_RBP);
break;
case 0x3:
- unreachable("Cannot have Mod = 0b11 and a SIB byte");
+ debug("Cannot have Mod = 0b11 and a SIB byte");
+ return -1;
}
break;
default:
- insn->sibBase = (EABase)(sibBaseBase + base);
+ insn->sibBase = (SIBBase)(sibBaseBase + base);
break;
}
int16_t d16;
int32_t d32;
- dprintf(insn, "readDisplacement()");
+ dbgprintf(insn, "readDisplacement()");
if (insn->consumedDisplacement)
return 0;
static int readModRM(struct InternalInstruction* insn) {
uint8_t mod, rm, reg;
- dprintf(insn, "readModRM()");
+ dbgprintf(insn, "readModRM()");
if (insn->consumedModRM)
return 0;
- consumeByte(insn, &insn->modRM);
+ if (consumeByte(insn, &insn->modRM))
+ return -1;
insn->consumedModRM = TRUE;
mod = modFromModRM(insn->modRM);
*/
switch (insn->registerSize) {
case 2:
- insn->regBase = REG_AX;
+ insn->regBase = MODRM_REG_AX;
insn->eaRegBase = EA_REG_AX;
break;
case 4:
- insn->regBase = REG_EAX;
+ insn->regBase = MODRM_REG_EAX;
insn->eaRegBase = EA_REG_EAX;
break;
case 8:
- insn->regBase = REG_RAX;
+ insn->regBase = MODRM_REG_RAX;
insn->eaRegBase = EA_REG_RAX;
break;
}
if (rm == 0x6) {
insn->eaBase = EA_BASE_NONE;
insn->eaDisplacement = EA_DISP_16;
- if(readDisplacement(insn))
+ if (readDisplacement(insn))
return -1;
} else {
insn->eaBase = (EABase)(insn->eaBaseBase + rm);
case 0x1:
insn->eaBase = (EABase)(insn->eaBaseBase + rm);
insn->eaDisplacement = EA_DISP_8;
- if(readDisplacement(insn))
+ if (readDisplacement(insn))
return -1;
break;
case 0x2:
insn->eaBase = (EABase)(insn->eaBaseBase + rm);
insn->eaDisplacement = EA_DISP_16;
- if(readDisplacement(insn))
+ if (readDisplacement(insn))
return -1;
break;
case 0x3:
insn->eaBase = (EABase)(insn->eaRegBase + rm);
- if(readDisplacement(insn))
+ if (readDisplacement(insn))
return -1;
break;
}
insn->eaBase = (insn->addressSize == 4 ?
EA_BASE_sib : EA_BASE_sib64);
readSIB(insn);
- if(readDisplacement(insn))
+ if (readDisplacement(insn))
return -1;
break;
case 0x5:
insn->eaBase = EA_BASE_NONE;
insn->eaDisplacement = EA_DISP_32;
- if(readDisplacement(insn))
+ if (readDisplacement(insn))
return -1;
break;
default:
case 0xc: /* in case REXW.b is set */
insn->eaBase = EA_BASE_sib;
readSIB(insn);
- if(readDisplacement(insn))
+ if (readDisplacement(insn))
return -1;
break;
default:
insn->eaBase = (EABase)(insn->eaBaseBase + rm);
- if(readDisplacement(insn))
+ if (readDisplacement(insn))
return -1;
break;
}
*valid = 1; \
switch (type) { \
default: \
- unreachable("Unhandled register type"); \
+ debug("Unhandled register type"); \
+ *valid = 0; \
+ return 0; \
case TYPE_Rv: \
return base + index; \
case TYPE_R8: \
- if(insn->rexPrefix && \
+ if (insn->rexPrefix && \
index >= 4 && index <= 7) { \
return prefix##_SPL + (index - 4); \
} else { \
return prefix##_EAX + index; \
case TYPE_R64: \
return prefix##_RAX + index; \
+ case TYPE_XMM256: \
+ return prefix##_YMM0 + index; \
case TYPE_XMM128: \
case TYPE_XMM64: \
case TYPE_XMM32: \
case TYPE_MM64: \
case TYPE_MM32: \
case TYPE_MM: \
- if(index > 7) \
+ if (index > 7) \
*valid = 0; \
return prefix##_MM0 + index; \
case TYPE_SEGMENTREG: \
- if(index > 5) \
+ if (index > 5) \
*valid = 0; \
return prefix##_ES + index; \
case TYPE_DEBUGREG: \
- if(index > 7) \
+ if (index > 7) \
*valid = 0; \
return prefix##_DR0 + index; \
- case TYPE_CR32: \
- if(index > 7) \
- *valid = 0; \
- return prefix##_ECR0 + index; \
- case TYPE_CR64: \
- if(index > 8) \
+ case TYPE_CONTROLREG: \
+ if (index > 8) \
*valid = 0; \
- return prefix##_RCR0 + index; \
+ return prefix##_CR0 + index; \
} \
}
* @param index - The existing value of the field as reported by readModRM().
* @param valid - The address of a uint8_t. The target is set to 1 if the
* field is valid for the register class; 0 if not.
+ * @return - The proper value.
*/
-GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, REG)
+GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
/*
* invalid for its class.
*/
static int fixupReg(struct InternalInstruction *insn,
- struct OperandSpecifier *op) {
+ const struct OperandSpecifier *op) {
uint8_t valid;
- dprintf(insn, "fixupReg()");
+ dbgprintf(insn, "fixupReg()");
switch ((OperandEncoding)op->encoding) {
default:
- unreachable("Expected a REG or R/M encoding in fixupReg");
+ debug("Expected a REG or R/M encoding in fixupReg");
+ return -1;
+ case ENCODING_VVVV:
+ insn->vvvv = (Reg)fixupRegValue(insn,
+ (OperandType)op->type,
+ insn->vvvv,
+ &valid);
+ if (!valid)
+ return -1;
+ break;
case ENCODING_REG:
insn->reg = (Reg)fixupRegValue(insn,
(OperandType)op->type,
* @param insn - The instruction whose opcode field is to be read.
* @param inModRM - Indicates that the opcode field is to be read from the
* ModR/M extension; useful for escape opcodes
+ * @return - 0 on success; nonzero otherwise.
*/
-static void readOpcodeModifier(struct InternalInstruction* insn) {
- dprintf(insn, "readOpcodeModifier()");
+static int readOpcodeModifier(struct InternalInstruction* insn) {
+ dbgprintf(insn, "readOpcodeModifier()");
if (insn->consumedOpcodeModifier)
- return;
+ return 0;
insn->consumedOpcodeModifier = TRUE;
- switch(insn->spec->modifierType) {
+ switch (insn->spec->modifierType) {
default:
- unreachable("Unknown modifier type.");
+ debug("Unknown modifier type.");
+ return -1;
case MODIFIER_NONE:
- unreachable("No modifier but an operand expects one.");
+ debug("No modifier but an operand expects one.");
+ return -1;
case MODIFIER_OPCODE:
insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
- break;
+ return 0;
case MODIFIER_MODRM:
insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
- break;
+ return 0;
}
}
* @param size - The width (in bytes) of the register being specified.
* 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
* RAX.
+ * @return - 0 on success; nonzero otherwise.
*/
-static void readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
- dprintf(insn, "readOpcodeRegister()");
+static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
+ dbgprintf(insn, "readOpcodeRegister()");
- readOpcodeModifier(insn);
+ if (readOpcodeModifier(insn))
+ return -1;
if (size == 0)
size = insn->registerSize;
switch (size) {
case 1:
- insn->opcodeRegister = (Reg)(REG_AL + ((bFromREX(insn->rexPrefix) << 3)
- | insn->opcodeModifier));
- if(insn->rexPrefix &&
- insn->opcodeRegister >= REG_AL + 0x4 &&
- insn->opcodeRegister < REG_AL + 0x8) {
- insn->opcodeRegister = (Reg)(REG_SPL + (insn->opcodeRegister - REG_AL - 4));
+ insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
+ if (insn->rexPrefix &&
+ insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
+ insn->opcodeRegister < MODRM_REG_AL + 0x8) {
+ insn->opcodeRegister = (Reg)(MODRM_REG_SPL
+ + (insn->opcodeRegister - MODRM_REG_AL - 4));
}
break;
case 2:
- insn->opcodeRegister = (Reg)(REG_AX + ((bFromREX(insn->rexPrefix) << 3)
- | insn->opcodeModifier));
+ insn->opcodeRegister = (Reg)(MODRM_REG_AX
+ + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
break;
case 4:
- insn->opcodeRegister = (Reg)(REG_EAX + ((bFromREX(insn->rexPrefix) << 3)
- | insn->opcodeModifier));
+ insn->opcodeRegister = (Reg)(MODRM_REG_EAX
+ + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
break;
case 8:
- insn->opcodeRegister = (Reg)(REG_RAX + ((bFromREX(insn->rexPrefix) << 3)
- |insn->opcodeModifier));
+ insn->opcodeRegister = (Reg)(MODRM_REG_RAX
+ + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
break;
}
+
+ return 0;
}
/*
uint32_t imm32;
uint64_t imm64;
- dprintf(insn, "readImmediate()");
+ dbgprintf(insn, "readImmediate()");
- if (insn->numImmediatesConsumed == 2)
- unreachable("Already consumed two immediates");
+ if (insn->numImmediatesConsumed == 2) {
+ debug("Already consumed two immediates");
+ return -1;
+ }
if (size == 0)
size = insn->immediateSize;
return 0;
}
+/*
+ * readVVVV - Consumes an immediate operand from an instruction, given the
+ * desired operand size.
+ *
+ * @param insn - The instruction whose operand is to be read.
+ * @return - 0 if the immediate was successfully consumed; nonzero
+ * otherwise.
+ */
+static int readVVVV(struct InternalInstruction* insn) {
+ dbgprintf(insn, "readVVVV()");
+
+ if (insn->vexSize == 3)
+ insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
+ else if (insn->vexSize == 2)
+ insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]);
+ else
+ return -1;
+
+ return 0;
+}
+
/*
* readOperands - Consults the specifier for an instruction and consumes all
* operands for that instruction, interpreting them as it goes.
static int readOperands(struct InternalInstruction* insn) {
int index;
- dprintf(insn, "readOperands()");
+ dbgprintf(insn, "readOperands()");
for (index = 0; index < X86_MAX_OPERANDS; ++index) {
switch (insn->spec->operands[index].encoding) {
case ENCODING_CP:
case ENCODING_CO:
case ENCODING_CT:
- dprintf(insn, "We currently don't hande code-offset encodings");
+ dbgprintf(insn, "We currently don't hande code-offset encodings");
return -1;
case ENCODING_IB:
if (readImmediate(insn, 1))
return -1;
+ if (insn->spec->operands[index].type == TYPE_IMM3 &&
+ insn->immediates[insn->numImmediatesConsumed - 1] > 7)
+ return -1;
break;
case ENCODING_IW:
if (readImmediate(insn, 2))
return -1;
break;
case ENCODING_Iv:
- readImmediate(insn, insn->immediateSize);
+ if (readImmediate(insn, insn->immediateSize))
+ return -1;
break;
case ENCODING_Ia:
- readImmediate(insn, insn->addressSize);
+ if (readImmediate(insn, insn->addressSize))
+ return -1;
break;
case ENCODING_RB:
- readOpcodeRegister(insn, 1);
+ if (readOpcodeRegister(insn, 1))
+ return -1;
break;
case ENCODING_RW:
- readOpcodeRegister(insn, 2);
+ if (readOpcodeRegister(insn, 2))
+ return -1;
break;
case ENCODING_RD:
- readOpcodeRegister(insn, 4);
+ if (readOpcodeRegister(insn, 4))
+ return -1;
break;
case ENCODING_RO:
- readOpcodeRegister(insn, 8);
+ if (readOpcodeRegister(insn, 8))
+ return -1;
break;
case ENCODING_Rv:
- readOpcodeRegister(insn, 0);
+ if (readOpcodeRegister(insn, 0))
+ return -1;
break;
case ENCODING_I:
- readOpcodeModifier(insn);
+ if (readOpcodeModifier(insn))
+ return -1;
+ break;
+ case ENCODING_VVVV:
+ if (readVVVV(insn))
+ return -1;
+ if (fixupReg(insn, &insn->spec->operands[index]))
+ return -1;
break;
case ENCODING_DUP:
break;
default:
- dprintf(insn, "Encountered an operand with an unknown encoding.");
+ dbgprintf(insn, "Encountered an operand with an unknown encoding.");
return -1;
}
}
insn->length = insn->readerCursor - insn->startLocation;
- dprintf(insn, "Read from 0x%llx to 0x%llx: length %llu",
- startLoc, insn->readerCursor, insn->length);
+ dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
+ startLoc, insn->readerCursor, insn->length);
if (insn->length > 15)
- dprintf(insn, "Instruction exceeds 15-byte limit");
+ dbgprintf(insn, "Instruction exceeds 15-byte limit");
return 0;
}