*
*===----------------------------------------------------------------------===*/
-#include <assert.h> /* for assert() */
#include <stdarg.h> /* for va_*() */
#include <stdio.h> /* for vsnprintf() */
#include <stdlib.h> /* for exit() */
#define TRUE 1
#define FALSE 0
-#ifdef __GNUC__
-#define NORETURN __attribute__((noreturn))
+typedef int8_t bool;
+
+#ifndef NDEBUG
+#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
#else
-#define NORETURN
+#define debug(s) do { } while (0)
#endif
-#define unreachable(s) \
- do { \
- fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, s); \
- exit(-1); \
- } while (0);
/*
* contextForAttrs - Client for the instruction context table. Takes a set of
* @return - The InstructionContext to use when looking up an
* an instruction with these attributes.
*/
-static inline InstructionContext contextForAttrs(uint8_t attrMask) {
+static InstructionContext contextForAttrs(uint8_t attrMask) {
return CONTEXTS_SYM[attrMask];
}
* ModR/M extensions and escapes.
* @return - TRUE if the ModR/M byte is required, FALSE otherwise.
*/
-static inline int modRMRequired(OpcodeType type,
+static int modRMRequired(OpcodeType type,
InstructionContext insnContext,
uint8_t opcode) {
const struct ContextDecision* decision = 0;
return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
modrm_type != MODRM_ONEENTRY;
- unreachable("Unknown opcode type");
return 0;
}
* @param insnContext - See modRMRequired().
* @param opcode - See modRMRequired().
* @param modRM - The ModR/M byte if required, or any value if not.
+ * @return - The UID of the instruction, or 0 on failure.
*/
-static inline InstrUID decode(OpcodeType type,
- InstructionContext insnContext,
- uint8_t opcode,
- uint8_t modRM) {
- struct ModRMDecision* dec;
+static InstrUID decode(OpcodeType type,
+ InstructionContext insnContext,
+ uint8_t opcode,
+ uint8_t modRM) {
+ const struct ModRMDecision* dec;
switch (type) {
default:
- unreachable("Unknown opcode type");
+ debug("Unknown opcode type");
+ return 0;
case ONEBYTE:
dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
switch (dec->modrm_type) {
default:
- unreachable("Corrupt table! Unknown modrm_type");
+ debug("Corrupt table! Unknown modrm_type");
+ return 0;
case MODRM_ONEENTRY:
return dec->instructionIDs[0];
case MODRM_SPLITRM:
case MODRM_FULL:
return dec->instructionIDs[modRM];
}
-
- return 0;
}
/*
* decode(); specifierForUID will not check bounds.
* @return - A pointer to the specification for that instruction.
*/
-static inline struct InstructionSpecifier* specifierForUID(InstrUID uid) {
+static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
return &INSTRUCTIONS_SYM[uid];
}
* with the data read.
* @return - 0 if the read was successful; nonzero otherwise.
*/
-static inline int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
+static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
if (!ret)
* @param byte - See consumeByte().
* @return - See consumeByte().
*/
-static inline int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
+static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
return insn->reader(insn->readerArg, byte, insn->readerCursor);
}
-static inline void unconsumeByte(struct InternalInstruction* insn) {
+static void unconsumeByte(struct InternalInstruction* insn) {
insn->readerCursor--;
}
-#define CONSUME_FUNC(name, type) \
- static inline int name(struct InternalInstruction* insn, type* ptr) { \
- type combined = 0; \
- unsigned offset; \
- for (offset = 0; offset < sizeof(type); ++offset) { \
- uint8_t byte; \
- int ret = insn->reader(insn->readerArg, \
- &byte, \
- insn->readerCursor + offset); \
- if (ret) \
- return ret; \
- combined = combined | ((type)byte << ((type)offset * 8)); \
- } \
- *ptr = combined; \
- insn->readerCursor += sizeof(type); \
- return 0; \
+#define CONSUME_FUNC(name, type) \
+ static int name(struct InternalInstruction* insn, type* ptr) { \
+ type combined = 0; \
+ unsigned offset; \
+ for (offset = 0; offset < sizeof(type); ++offset) { \
+ uint8_t byte; \
+ int ret = insn->reader(insn->readerArg, \
+ &byte, \
+ insn->readerCursor + offset); \
+ if (ret) \
+ return ret; \
+ combined = combined | ((type)byte << ((type)offset * 8)); \
+ } \
+ *ptr = combined; \
+ insn->readerCursor += sizeof(type); \
+ return 0; \
}
/*
* @param format - See printf().
* @param ... - See printf().
*/
-static inline void dbgprintf(struct InternalInstruction* insn,
- const char* format,
- ...) {
+static void dbgprintf(struct InternalInstruction* insn,
+ const char* format,
+ ...) {
char buffer[256];
va_list ap;
* @param location - The location where the prefix is located (in the address
* space of the instruction's reader).
*/
-static inline void setPrefixPresent(struct InternalInstruction* insn,
+static void setPrefixPresent(struct InternalInstruction* insn,
uint8_t prefix,
uint64_t location)
{
* @param location - The location to query.
* @return - Whether the prefix is at that location.
*/
-static inline BOOL isPrefixAtLocation(struct InternalInstruction* insn,
- uint8_t prefix,
- uint64_t location)
+static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
+ uint8_t prefix,
+ uint64_t location)
{
if (insn->prefixPresent[prefix] == 1 &&
insn->prefixLocations[prefix] == location)
BOOL isPrefix = TRUE;
BOOL prefixGroups[4] = { FALSE };
uint64_t prefixLocation;
- uint8_t byte;
+ uint8_t byte = 0;
BOOL hasAdSize = FALSE;
BOOL hasOpSize = FALSE;
insn->segmentOverride = SEG_OVERRIDE_GS;
break;
default:
- unreachable("Unhandled override");
+ debug("Unhandled override");
+ return -1;
}
if (prefixGroups[1])
dbgprintf(insn, "Redundant Group 2 prefix");
if ((byte & 0xf0) == 0x40) {
uint8_t opcodeByte;
- if(lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
+ if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
dbgprintf(insn, "Redundant REX prefix");
return -1;
}
insn->registerSize = (hasOpSize ? 2 : 4);
insn->addressSize = (hasAdSize ? 2 : 4);
insn->displacementSize = (hasAdSize ? 2 : 4);
- insn->immediateSize = (hasAdSize ? 2 : 4);
+ insn->immediateSize = (hasOpSize ? 2 : 4);
} else if (insn->mode == MODE_64BIT) {
if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
insn->registerSize = 8;
insn->opcode);
if (hasModRMExtension) {
- readModRM(insn);
+ if (readModRM(insn))
+ return -1;
*instructionID = decode(insn->opcodeType,
instructionClass,
static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
off_t i;
- for(i = 0;; i++) {
- if(orig[i] == '\0' && equiv[i] == '\0')
+ for (i = 0;; i++) {
+ if (orig[i] == '\0' && equiv[i] == '\0')
return TRUE;
- if(orig[i] == '\0' || equiv[i] == '\0')
+ if (orig[i] == '\0' || equiv[i] == '\0')
return FALSE;
- if(orig[i] != equiv[i]) {
- if((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
+ if (orig[i] != equiv[i]) {
+ if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
continue;
- if((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
+ if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
continue;
- if((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
+ if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
continue;
return FALSE;
}
static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
off_t i;
- for(i = 0;; i++) {
- if(orig[i] == '\0' && equiv[i] == '\0')
+ for (i = 0;; i++) {
+ if (orig[i] == '\0' && equiv[i] == '\0')
return TRUE;
- if(orig[i] == '\0' || equiv[i] == '\0')
+ if (orig[i] == '\0' || equiv[i] == '\0')
return FALSE;
- if(orig[i] != equiv[i]) {
- if((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
+ if (orig[i] != equiv[i]) {
+ if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
continue;
- if((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
+ if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
continue;
- if((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
+ if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
continue;
return FALSE;
}
else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
attrMask |= ATTR_XD;
- if(getIDWithAttrMask(&instructionID, insn, attrMask))
+ if (getIDWithAttrMask(&instructionID, insn, attrMask))
return -1;
/* The following clauses compensate for limitations of the tables. */
* instead of F2 changes a 32 to a 64, we adopt the new encoding.
*/
- struct InstructionSpecifier* spec;
+ const struct InstructionSpecifier *spec;
uint16_t instructionIDWithREXw;
- struct InstructionSpecifier* specWithREXw;
+ const struct InstructionSpecifier *specWithREXw;
spec = specifierForUID(instructionID);
* in the right place we check if there's a 16-bit operation.
*/
- struct InstructionSpecifier* spec;
+ const struct InstructionSpecifier *spec;
uint16_t instructionIDWithOpsize;
- struct InstructionSpecifier* specWithOpsize;
+ const struct InstructionSpecifier *specWithOpsize;
spec = specifierForUID(instructionID);
SIB_BASE_EBP : SIB_BASE_RBP);
break;
case 0x3:
- unreachable("Cannot have Mod = 0b11 and a SIB byte");
+ debug("Cannot have Mod = 0b11 and a SIB byte");
+ return -1;
}
break;
default:
if (insn->consumedModRM)
return 0;
- consumeByte(insn, &insn->modRM);
+ if (consumeByte(insn, &insn->modRM))
+ return -1;
insn->consumedModRM = TRUE;
mod = modFromModRM(insn->modRM);
*/
switch (insn->registerSize) {
case 2:
- insn->regBase = REG_AX;
+ insn->regBase = MODRM_REG_AX;
insn->eaRegBase = EA_REG_AX;
break;
case 4:
- insn->regBase = REG_EAX;
+ insn->regBase = MODRM_REG_EAX;
insn->eaRegBase = EA_REG_EAX;
break;
case 8:
- insn->regBase = REG_RAX;
+ insn->regBase = MODRM_REG_RAX;
insn->eaRegBase = EA_REG_RAX;
break;
}
if (rm == 0x6) {
insn->eaBase = EA_BASE_NONE;
insn->eaDisplacement = EA_DISP_16;
- if(readDisplacement(insn))
+ if (readDisplacement(insn))
return -1;
} else {
insn->eaBase = (EABase)(insn->eaBaseBase + rm);
case 0x1:
insn->eaBase = (EABase)(insn->eaBaseBase + rm);
insn->eaDisplacement = EA_DISP_8;
- if(readDisplacement(insn))
+ if (readDisplacement(insn))
return -1;
break;
case 0x2:
insn->eaBase = (EABase)(insn->eaBaseBase + rm);
insn->eaDisplacement = EA_DISP_16;
- if(readDisplacement(insn))
+ if (readDisplacement(insn))
return -1;
break;
case 0x3:
insn->eaBase = (EABase)(insn->eaRegBase + rm);
- if(readDisplacement(insn))
+ if (readDisplacement(insn))
return -1;
break;
}
insn->eaBase = (insn->addressSize == 4 ?
EA_BASE_sib : EA_BASE_sib64);
readSIB(insn);
- if(readDisplacement(insn))
+ if (readDisplacement(insn))
return -1;
break;
case 0x5:
insn->eaBase = EA_BASE_NONE;
insn->eaDisplacement = EA_DISP_32;
- if(readDisplacement(insn))
+ if (readDisplacement(insn))
return -1;
break;
default:
case 0xc: /* in case REXW.b is set */
insn->eaBase = EA_BASE_sib;
readSIB(insn);
- if(readDisplacement(insn))
+ if (readDisplacement(insn))
return -1;
break;
default:
insn->eaBase = (EABase)(insn->eaBaseBase + rm);
- if(readDisplacement(insn))
+ if (readDisplacement(insn))
return -1;
break;
}
*valid = 1; \
switch (type) { \
default: \
- unreachable("Unhandled register type"); \
+ debug("Unhandled register type"); \
+ *valid = 0; \
+ return 0; \
case TYPE_Rv: \
return base + index; \
case TYPE_R8: \
- if(insn->rexPrefix && \
+ if (insn->rexPrefix && \
index >= 4 && index <= 7) { \
return prefix##_SPL + (index - 4); \
} else { \
case TYPE_MM64: \
case TYPE_MM32: \
case TYPE_MM: \
- if(index > 7) \
+ if (index > 7) \
*valid = 0; \
return prefix##_MM0 + index; \
case TYPE_SEGMENTREG: \
- if(index > 5) \
+ if (index > 5) \
*valid = 0; \
return prefix##_ES + index; \
case TYPE_DEBUGREG: \
- if(index > 7) \
+ if (index > 7) \
*valid = 0; \
return prefix##_DR0 + index; \
- case TYPE_CR32: \
- if(index > 7) \
+ case TYPE_CONTROLREG: \
+ if (index > 8) \
*valid = 0; \
- return prefix##_ECR0 + index; \
- case TYPE_CR64: \
- if(index > 8) \
- *valid = 0; \
- return prefix##_RCR0 + index; \
+ return prefix##_CR0 + index; \
} \
}
* @param index - The existing value of the field as reported by readModRM().
* @param valid - The address of a uint8_t. The target is set to 1 if the
* field is valid for the register class; 0 if not.
+ * @return - The proper value.
*/
-GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, REG)
+GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
/*
* invalid for its class.
*/
static int fixupReg(struct InternalInstruction *insn,
- struct OperandSpecifier *op) {
+ const struct OperandSpecifier *op) {
uint8_t valid;
dbgprintf(insn, "fixupReg()");
switch ((OperandEncoding)op->encoding) {
default:
- unreachable("Expected a REG or R/M encoding in fixupReg");
+ debug("Expected a REG or R/M encoding in fixupReg");
+ return -1;
case ENCODING_REG:
insn->reg = (Reg)fixupRegValue(insn,
(OperandType)op->type,
* @param insn - The instruction whose opcode field is to be read.
* @param inModRM - Indicates that the opcode field is to be read from the
* ModR/M extension; useful for escape opcodes
+ * @return - 0 on success; nonzero otherwise.
*/
-static void readOpcodeModifier(struct InternalInstruction* insn) {
+static int readOpcodeModifier(struct InternalInstruction* insn) {
dbgprintf(insn, "readOpcodeModifier()");
if (insn->consumedOpcodeModifier)
- return;
+ return 0;
insn->consumedOpcodeModifier = TRUE;
- switch(insn->spec->modifierType) {
+ switch (insn->spec->modifierType) {
default:
- unreachable("Unknown modifier type.");
+ debug("Unknown modifier type.");
+ return -1;
case MODIFIER_NONE:
- unreachable("No modifier but an operand expects one.");
+ debug("No modifier but an operand expects one.");
+ return -1;
case MODIFIER_OPCODE:
insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
- break;
+ return 0;
case MODIFIER_MODRM:
insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
- break;
+ return 0;
}
}
* @param size - The width (in bytes) of the register being specified.
* 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
* RAX.
+ * @return - 0 on success; nonzero otherwise.
*/
-static void readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
+static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
dbgprintf(insn, "readOpcodeRegister()");
- readOpcodeModifier(insn);
+ if (readOpcodeModifier(insn))
+ return -1;
if (size == 0)
size = insn->registerSize;
switch (size) {
case 1:
- insn->opcodeRegister = (Reg)(REG_AL + ((bFromREX(insn->rexPrefix) << 3)
- | insn->opcodeModifier));
- if(insn->rexPrefix &&
- insn->opcodeRegister >= REG_AL + 0x4 &&
- insn->opcodeRegister < REG_AL + 0x8) {
- insn->opcodeRegister = (Reg)(REG_SPL + (insn->opcodeRegister - REG_AL - 4));
+ insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
+ if (insn->rexPrefix &&
+ insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
+ insn->opcodeRegister < MODRM_REG_AL + 0x8) {
+ insn->opcodeRegister = (Reg)(MODRM_REG_SPL
+ + (insn->opcodeRegister - MODRM_REG_AL - 4));
}
break;
case 2:
- insn->opcodeRegister = (Reg)(REG_AX + ((bFromREX(insn->rexPrefix) << 3)
- | insn->opcodeModifier));
+ insn->opcodeRegister = (Reg)(MODRM_REG_AX
+ + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
break;
case 4:
- insn->opcodeRegister = (Reg)(REG_EAX + ((bFromREX(insn->rexPrefix) << 3)
- | insn->opcodeModifier));
+ insn->opcodeRegister = (Reg)(MODRM_REG_EAX
+ + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
break;
case 8:
- insn->opcodeRegister = (Reg)(REG_RAX + ((bFromREX(insn->rexPrefix) << 3)
- |insn->opcodeModifier));
+ insn->opcodeRegister = (Reg)(MODRM_REG_RAX
+ + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
break;
}
+
+ return 0;
}
/*
dbgprintf(insn, "readImmediate()");
- if (insn->numImmediatesConsumed == 2)
- unreachable("Already consumed two immediates");
+ if (insn->numImmediatesConsumed == 2) {
+ debug("Already consumed two immediates");
+ return -1;
+ }
if (size == 0)
size = insn->immediateSize;
case ENCODING_IB:
if (readImmediate(insn, 1))
return -1;
+ if (insn->spec->operands[index].type == TYPE_IMM3 &&
+ insn->immediates[insn->numImmediatesConsumed - 1] > 7)
+ return -1;
break;
case ENCODING_IW:
if (readImmediate(insn, 2))
return -1;
break;
case ENCODING_Iv:
- readImmediate(insn, insn->immediateSize);
+ if (readImmediate(insn, insn->immediateSize))
+ return -1;
break;
case ENCODING_Ia:
- readImmediate(insn, insn->addressSize);
+ if (readImmediate(insn, insn->addressSize))
+ return -1;
break;
case ENCODING_RB:
- readOpcodeRegister(insn, 1);
+ if (readOpcodeRegister(insn, 1))
+ return -1;
break;
case ENCODING_RW:
- readOpcodeRegister(insn, 2);
+ if (readOpcodeRegister(insn, 2))
+ return -1;
break;
case ENCODING_RD:
- readOpcodeRegister(insn, 4);
+ if (readOpcodeRegister(insn, 4))
+ return -1;
break;
case ENCODING_RO:
- readOpcodeRegister(insn, 8);
+ if (readOpcodeRegister(insn, 8))
+ return -1;
break;
case ENCODING_Rv:
- readOpcodeRegister(insn, 0);
+ if (readOpcodeRegister(insn, 0))
+ return -1;
break;
case ENCODING_I:
- readOpcodeModifier(insn);
- break;
+ if (readOpcodeModifier(insn))
+ return -1;
case ENCODING_DUP:
break;
default:
insn->length = insn->readerCursor - insn->startLocation;
- dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %llu",
- startLoc, insn->readerCursor, insn->length);
+ dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
+ startLoc, insn->readerCursor, insn->length);
if (insn->length > 15)
dbgprintf(insn, "Instruction exceeds 15-byte limit");