1 /******************************************************************************
4 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
6 * Copyright (c) 2005 Keir Fraser
8 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9 * privileged instructions:
11 * Copyright (C) 2006 Qumranet
12 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
14 * Avi Kivity <avi@qumranet.com>
15 * Yaniv Kamay <yaniv@qumranet.com>
17 * This work is licensed under the terms of the GNU GPL, version 2. See
18 * the COPYING file in the top-level directory.
20 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
23 #include <linux/kvm_host.h>
24 #include "kvm_cache_regs.h"
25 #include <linux/module.h>
26 #include <asm/kvm_emulate.h>
27 #include <linux/stringify.h>
28 #include <asm/debugreg.h>
37 #define OpImplicit 1ull /* No generic decode */
38 #define OpReg 2ull /* Register */
39 #define OpMem 3ull /* Memory */
40 #define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */
41 #define OpDI 5ull /* ES:DI/EDI/RDI */
42 #define OpMem64 6ull /* Memory, 64-bit */
43 #define OpImmUByte 7ull /* Zero-extended 8-bit immediate */
44 #define OpDX 8ull /* DX register */
45 #define OpCL 9ull /* CL register (for shifts) */
46 #define OpImmByte 10ull /* 8-bit sign extended immediate */
47 #define OpOne 11ull /* Implied 1 */
48 #define OpImm 12ull /* Sign extended up to 32-bit immediate */
49 #define OpMem16 13ull /* Memory operand (16-bit). */
50 #define OpMem32 14ull /* Memory operand (32-bit). */
51 #define OpImmU 15ull /* Immediate operand, zero extended */
52 #define OpSI 16ull /* SI/ESI/RSI */
53 #define OpImmFAddr 17ull /* Immediate far address */
54 #define OpMemFAddr 18ull /* Far address in memory */
55 #define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */
56 #define OpES 20ull /* ES */
57 #define OpCS 21ull /* CS */
58 #define OpSS 22ull /* SS */
59 #define OpDS 23ull /* DS */
60 #define OpFS 24ull /* FS */
61 #define OpGS 25ull /* GS */
62 #define OpMem8 26ull /* 8-bit zero extended memory operand */
63 #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
64 #define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
65 #define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */
66 #define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */
68 #define OpBits 5 /* Width of operand field */
69 #define OpMask ((1ull << OpBits) - 1)
72 * Opcode effective-address decode tables.
73 * Note that we only emulate instructions that have at least one memory
74 * operand (excluding implicit stack references). We assume that stack
75 * references and instruction fetches will never occur in special memory
76 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
80 /* Operand sizes: 8-bit operands or specified/overridden size. */
81 #define ByteOp (1<<0) /* 8-bit operands. */
82 /* Destination operand type. */
84 #define ImplicitOps (OpImplicit << DstShift)
85 #define DstReg (OpReg << DstShift)
86 #define DstMem (OpMem << DstShift)
87 #define DstAcc (OpAcc << DstShift)
88 #define DstDI (OpDI << DstShift)
89 #define DstMem64 (OpMem64 << DstShift)
90 #define DstMem16 (OpMem16 << DstShift)
91 #define DstImmUByte (OpImmUByte << DstShift)
92 #define DstDX (OpDX << DstShift)
93 #define DstAccLo (OpAccLo << DstShift)
94 #define DstMask (OpMask << DstShift)
95 /* Source operand type. */
97 #define SrcNone (OpNone << SrcShift)
98 #define SrcReg (OpReg << SrcShift)
99 #define SrcMem (OpMem << SrcShift)
100 #define SrcMem16 (OpMem16 << SrcShift)
101 #define SrcMem32 (OpMem32 << SrcShift)
102 #define SrcImm (OpImm << SrcShift)
103 #define SrcImmByte (OpImmByte << SrcShift)
104 #define SrcOne (OpOne << SrcShift)
105 #define SrcImmUByte (OpImmUByte << SrcShift)
106 #define SrcImmU (OpImmU << SrcShift)
107 #define SrcSI (OpSI << SrcShift)
108 #define SrcXLat (OpXLat << SrcShift)
109 #define SrcImmFAddr (OpImmFAddr << SrcShift)
110 #define SrcMemFAddr (OpMemFAddr << SrcShift)
111 #define SrcAcc (OpAcc << SrcShift)
112 #define SrcImmU16 (OpImmU16 << SrcShift)
113 #define SrcImm64 (OpImm64 << SrcShift)
114 #define SrcDX (OpDX << SrcShift)
115 #define SrcMem8 (OpMem8 << SrcShift)
116 #define SrcAccHi (OpAccHi << SrcShift)
117 #define SrcMask (OpMask << SrcShift)
118 #define BitOp (1<<11)
119 #define MemAbs (1<<12) /* Memory operand is absolute displacement */
120 #define String (1<<13) /* String instruction (rep capable) */
121 #define Stack (1<<14) /* Stack instruction (push/pop) */
122 #define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */
123 #define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */
124 #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
125 #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
126 #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
127 #define Escape (5<<15) /* Escape to coprocessor instruction */
128 #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
129 #define ModeDual (7<<15) /* Different instruction for 32/64 bit */
130 #define Sse (1<<18) /* SSE Vector instruction */
131 /* Generic ModRM decode. */
132 #define ModRM (1<<19)
133 /* Destination is only written; never read. */
136 #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
137 #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
138 #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
139 #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
140 #define Undefined (1<<25) /* No Such Instruction */
141 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
142 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
144 #define PageTable (1 << 29) /* instruction used to write page table */
145 #define NotImpl (1 << 30) /* instruction is not implemented */
146 /* Source 2 operand type */
147 #define Src2Shift (31)
148 #define Src2None (OpNone << Src2Shift)
149 #define Src2Mem (OpMem << Src2Shift)
150 #define Src2CL (OpCL << Src2Shift)
151 #define Src2ImmByte (OpImmByte << Src2Shift)
152 #define Src2One (OpOne << Src2Shift)
153 #define Src2Imm (OpImm << Src2Shift)
154 #define Src2ES (OpES << Src2Shift)
155 #define Src2CS (OpCS << Src2Shift)
156 #define Src2SS (OpSS << Src2Shift)
157 #define Src2DS (OpDS << Src2Shift)
158 #define Src2FS (OpFS << Src2Shift)
159 #define Src2GS (OpGS << Src2Shift)
160 #define Src2Mask (OpMask << Src2Shift)
161 #define Mmx ((u64)1 << 40) /* MMX Vector instruction */
162 #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
163 #define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */
164 #define Avx ((u64)1 << 43) /* Advanced Vector Extensions */
165 #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
166 #define NoWrite ((u64)1 << 45) /* No writeback */
167 #define SrcWrite ((u64)1 << 46) /* Write back src operand */
168 #define NoMod ((u64)1 << 47) /* Mod field is ignored */
169 #define Intercept ((u64)1 << 48) /* Has valid intercept field */
170 #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
171 #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
172 #define NearBranch ((u64)1 << 52) /* Near branches */
173 #define No16 ((u64)1 << 53) /* No 16 bit operand */
174 #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
175 #define Aligned16 ((u64)1 << 55) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
177 #define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
179 #define X2(x...) x, x
180 #define X3(x...) X2(x), x
181 #define X4(x...) X2(x), X2(x)
182 #define X5(x...) X4(x), x
183 #define X6(x...) X4(x), X2(x)
184 #define X7(x...) X4(x), X3(x)
185 #define X8(x...) X4(x), X4(x)
186 #define X16(x...) X8(x), X8(x)
188 #define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
189 #define FASTOP_SIZE 8
192 * fastop functions have a special calling convention:
197 * flags: rflags (in/out)
198 * ex: rsi (in:fastop pointer, out:zero if exception)
200 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
201 * different operand sizes can be reached by calculation, rather than a jump
202 * table (which would be bigger than the code).
204 * fastop functions are declared as taking a never-defined fastop parameter,
205 * so they can't be called from C directly.
214 int (*execute)(struct x86_emulate_ctxt *ctxt);
215 const struct opcode *group;
216 const struct group_dual *gdual;
217 const struct gprefix *gprefix;
218 const struct escape *esc;
219 const struct instr_dual *idual;
220 const struct mode_dual *mdual;
221 void (*fastop)(struct fastop *fake);
223 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
227 struct opcode mod012[8];
228 struct opcode mod3[8];
232 struct opcode pfx_no;
233 struct opcode pfx_66;
234 struct opcode pfx_f2;
235 struct opcode pfx_f3;
240 struct opcode high[64];
244 struct opcode mod012;
249 struct opcode mode32;
250 struct opcode mode64;
253 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
255 enum x86_transfer_type {
257 X86_TRANSFER_CALL_JMP,
259 X86_TRANSFER_TASK_SWITCH,
262 static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
264 if (!(ctxt->regs_valid & (1 << nr))) {
265 ctxt->regs_valid |= 1 << nr;
266 ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
268 return ctxt->_regs[nr];
271 static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
273 ctxt->regs_valid |= 1 << nr;
274 ctxt->regs_dirty |= 1 << nr;
275 return &ctxt->_regs[nr];
278 static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
281 return reg_write(ctxt, nr);
284 static void writeback_registers(struct x86_emulate_ctxt *ctxt)
288 for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
289 ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
292 static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
294 ctxt->regs_dirty = 0;
295 ctxt->regs_valid = 0;
299 * These EFLAGS bits are restored from saved value during emulation, and
300 * any changes are written back to the saved value after emulation.
302 #define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
303 X86_EFLAGS_PF|X86_EFLAGS_CF)
311 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
313 #define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t"
314 #define FOP_RET "ret \n\t"
316 #define FOP_START(op) \
317 extern void em_##op(struct fastop *fake); \
318 asm(".pushsection .text, \"ax\" \n\t" \
319 ".global em_" #op " \n\t" \
326 #define FOPNOP() FOP_ALIGN FOP_RET
328 #define FOP1E(op, dst) \
329 FOP_ALIGN "10: " #op " %" #dst " \n\t" FOP_RET
331 #define FOP1EEX(op, dst) \
332 FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)
334 #define FASTOP1(op) \
339 ON64(FOP1E(op##q, rax)) \
342 /* 1-operand, using src2 (for MUL/DIV r/m) */
343 #define FASTOP1SRC2(op, name) \
348 ON64(FOP1E(op, rcx)) \
351 /* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
352 #define FASTOP1SRC2EX(op, name) \
357 ON64(FOP1EEX(op, rcx)) \
360 #define FOP2E(op, dst, src) \
361 FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET
363 #define FASTOP2(op) \
365 FOP2E(op##b, al, dl) \
366 FOP2E(op##w, ax, dx) \
367 FOP2E(op##l, eax, edx) \
368 ON64(FOP2E(op##q, rax, rdx)) \
371 /* 2 operand, word only */
372 #define FASTOP2W(op) \
375 FOP2E(op##w, ax, dx) \
376 FOP2E(op##l, eax, edx) \
377 ON64(FOP2E(op##q, rax, rdx)) \
380 /* 2 operand, src is CL */
381 #define FASTOP2CL(op) \
383 FOP2E(op##b, al, cl) \
384 FOP2E(op##w, ax, cl) \
385 FOP2E(op##l, eax, cl) \
386 ON64(FOP2E(op##q, rax, cl)) \
389 /* 2 operand, src and dest are reversed */
390 #define FASTOP2R(op, name) \
392 FOP2E(op##b, dl, al) \
393 FOP2E(op##w, dx, ax) \
394 FOP2E(op##l, edx, eax) \
395 ON64(FOP2E(op##q, rdx, rax)) \
398 #define FOP3E(op, dst, src, src2) \
399 FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
401 /* 3-operand, word-only, src2=cl */
402 #define FASTOP3WCL(op) \
405 FOP3E(op##w, ax, dx, cl) \
406 FOP3E(op##l, eax, edx, cl) \
407 ON64(FOP3E(op##q, rax, rdx, cl)) \
410 /* Special case for SETcc - 1 instruction per cc */
411 #define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t"
413 asm(".global kvm_fastop_exception \n"
414 "kvm_fastop_exception: xor %esi, %esi; ret");
435 FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET
439 * XXX: inoutclob user must know where the argument is being expanded.
440 * Relying on CC_HAVE_ASM_GOTO would allow us to remove _fault.
442 #define asm_safe(insn, inoutclob...) \
446 asm volatile("1:" insn "\n" \
448 ".pushsection .fixup, \"ax\"\n" \
449 "3: movl $1, %[_fault]\n" \
452 _ASM_EXTABLE(1b, 3b) \
453 : [_fault] "+qm"(_fault) inoutclob ); \
455 _fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
458 static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
459 enum x86_intercept intercept,
460 enum x86_intercept_stage stage)
462 struct x86_instruction_info info = {
463 .intercept = intercept,
464 .rep_prefix = ctxt->rep_prefix,
465 .modrm_mod = ctxt->modrm_mod,
466 .modrm_reg = ctxt->modrm_reg,
467 .modrm_rm = ctxt->modrm_rm,
468 .src_val = ctxt->src.val64,
469 .dst_val = ctxt->dst.val64,
470 .src_bytes = ctxt->src.bytes,
471 .dst_bytes = ctxt->dst.bytes,
472 .ad_bytes = ctxt->ad_bytes,
473 .next_rip = ctxt->eip,
476 return ctxt->ops->intercept(ctxt, &info, stage);
479 static void assign_masked(ulong *dest, ulong src, ulong mask)
481 *dest = (*dest & ~mask) | (src & mask);
484 static void assign_register(unsigned long *reg, u64 val, int bytes)
486 /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
489 *(u8 *)reg = (u8)val;
492 *(u16 *)reg = (u16)val;
496 break; /* 64b: zero-extend */
503 static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
505 return (1UL << (ctxt->ad_bytes << 3)) - 1;
508 static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
511 struct desc_struct ss;
513 if (ctxt->mode == X86EMUL_MODE_PROT64)
515 ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
516 return ~0U >> ((ss.d ^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */
519 static int stack_size(struct x86_emulate_ctxt *ctxt)
521 return (__fls(stack_mask(ctxt)) + 1) >> 3;
524 /* Access/update address held in a register, based on addressing mode. */
525 static inline unsigned long
526 address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
528 if (ctxt->ad_bytes == sizeof(unsigned long))
531 return reg & ad_mask(ctxt);
534 static inline unsigned long
535 register_address(struct x86_emulate_ctxt *ctxt, int reg)
537 return address_mask(ctxt, reg_read(ctxt, reg));
540 static void masked_increment(ulong *reg, ulong mask, int inc)
542 assign_masked(reg, *reg + inc, mask);
546 register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
548 ulong *preg = reg_rmw(ctxt, reg);
550 assign_register(preg, *preg + inc, ctxt->ad_bytes);
553 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
555 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
558 static u32 desc_limit_scaled(struct desc_struct *desc)
560 u32 limit = get_desc_limit(desc);
562 return desc->g ? (limit << 12) | 0xfff : limit;
565 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
567 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
570 return ctxt->ops->get_cached_segment_base(ctxt, seg);
573 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
574 u32 error, bool valid)
577 ctxt->exception.vector = vec;
578 ctxt->exception.error_code = error;
579 ctxt->exception.error_code_valid = valid;
580 return X86EMUL_PROPAGATE_FAULT;
583 static int emulate_db(struct x86_emulate_ctxt *ctxt)
585 return emulate_exception(ctxt, DB_VECTOR, 0, false);
588 static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
590 return emulate_exception(ctxt, GP_VECTOR, err, true);
593 static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
595 return emulate_exception(ctxt, SS_VECTOR, err, true);
598 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
600 return emulate_exception(ctxt, UD_VECTOR, 0, false);
603 static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
605 return emulate_exception(ctxt, TS_VECTOR, err, true);
608 static int emulate_de(struct x86_emulate_ctxt *ctxt)
610 return emulate_exception(ctxt, DE_VECTOR, 0, false);
613 static int emulate_nm(struct x86_emulate_ctxt *ctxt)
615 return emulate_exception(ctxt, NM_VECTOR, 0, false);
618 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
621 struct desc_struct desc;
623 ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
627 static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
632 struct desc_struct desc;
634 ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
635 ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
639 * x86 defines three classes of vector instructions: explicitly
640 * aligned, explicitly unaligned, and the rest, which change behaviour
641 * depending on whether they're AVX encoded or not.
643 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
644 * subject to the same check. FXSAVE and FXRSTOR are checked here too as their
645 * 512 bytes of data must be aligned to a 16 byte boundary.
647 static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
649 if (likely(size < 16))
652 if (ctxt->d & Aligned)
654 else if (ctxt->d & Unaligned)
656 else if (ctxt->d & Avx)
658 else if (ctxt->d & Aligned16)
664 static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
665 struct segmented_address addr,
666 unsigned *max_size, unsigned size,
667 bool write, bool fetch,
668 enum x86emul_mode mode, ulong *linear)
670 struct desc_struct desc;
676 la = seg_base(ctxt, addr.seg) + addr.ea;
679 case X86EMUL_MODE_PROT64:
681 if (is_noncanonical_address(la))
684 *max_size = min_t(u64, ~0u, (1ull << 48) - la);
685 if (size > *max_size)
689 *linear = la = (u32)la;
690 usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
694 /* code segment in protected mode or read-only data segment */
695 if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
696 || !(desc.type & 2)) && write)
698 /* unreadable code segment */
699 if (!fetch && (desc.type & 8) && !(desc.type & 2))
701 lim = desc_limit_scaled(&desc);
702 if (!(desc.type & 8) && (desc.type & 4)) {
703 /* expand-down segment */
706 lim = desc.d ? 0xffffffff : 0xffff;
710 if (lim == 0xffffffff)
713 *max_size = (u64)lim + 1 - addr.ea;
714 if (size > *max_size)
719 if (la & (insn_alignment(ctxt, size) - 1))
720 return emulate_gp(ctxt, 0);
721 return X86EMUL_CONTINUE;
723 if (addr.seg == VCPU_SREG_SS)
724 return emulate_ss(ctxt, 0);
726 return emulate_gp(ctxt, 0);
729 static int linearize(struct x86_emulate_ctxt *ctxt,
730 struct segmented_address addr,
731 unsigned size, bool write,
735 return __linearize(ctxt, addr, &max_size, size, write, false,
739 static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
740 enum x86emul_mode mode)
745 struct segmented_address addr = { .seg = VCPU_SREG_CS,
748 if (ctxt->op_bytes != sizeof(unsigned long))
749 addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
750 rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
751 if (rc == X86EMUL_CONTINUE)
752 ctxt->_eip = addr.ea;
756 static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
758 return assign_eip(ctxt, dst, ctxt->mode);
761 static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
762 const struct desc_struct *cs_desc)
764 enum x86emul_mode mode = ctxt->mode;
768 if (ctxt->mode >= X86EMUL_MODE_PROT16) {
772 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
774 mode = X86EMUL_MODE_PROT64;
776 mode = X86EMUL_MODE_PROT32; /* temporary value */
779 if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
780 mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
781 rc = assign_eip(ctxt, dst, mode);
782 if (rc == X86EMUL_CONTINUE)
787 static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
789 return assign_eip_near(ctxt, ctxt->_eip + rel);
792 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
793 struct segmented_address addr,
800 rc = linearize(ctxt, addr, size, false, &linear);
801 if (rc != X86EMUL_CONTINUE)
803 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
806 static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
807 struct segmented_address addr,
814 rc = linearize(ctxt, addr, size, true, &linear);
815 if (rc != X86EMUL_CONTINUE)
817 return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception);
821 * Prefetch the remaining bytes of the instruction without crossing page
822 * boundary if they are not in fetch_cache yet.
824 static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
827 unsigned size, max_size;
828 unsigned long linear;
829 int cur_size = ctxt->fetch.end - ctxt->fetch.data;
830 struct segmented_address addr = { .seg = VCPU_SREG_CS,
831 .ea = ctxt->eip + cur_size };
834 * We do not know exactly how many bytes will be needed, and
835 * __linearize is expensive, so fetch as much as possible. We
836 * just have to avoid going beyond the 15 byte limit, the end
837 * of the segment, or the end of the page.
839 * __linearize is called with size 0 so that it does not do any
840 * boundary check itself. Instead, we use max_size to check
843 rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
845 if (unlikely(rc != X86EMUL_CONTINUE))
848 size = min_t(unsigned, 15UL ^ cur_size, max_size);
849 size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
852 * One instruction can only straddle two pages,
853 * and one has been loaded at the beginning of
854 * x86_decode_insn. So, if not enough bytes
855 * still, we must have hit the 15-byte boundary.
857 if (unlikely(size < op_size))
858 return emulate_gp(ctxt, 0);
860 rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
861 size, &ctxt->exception);
862 if (unlikely(rc != X86EMUL_CONTINUE))
864 ctxt->fetch.end += size;
865 return X86EMUL_CONTINUE;
868 static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
871 unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
873 if (unlikely(done_size < size))
874 return __do_insn_fetch_bytes(ctxt, size - done_size);
876 return X86EMUL_CONTINUE;
879 /* Fetch next part of the instruction being emulated. */
880 #define insn_fetch(_type, _ctxt) \
883 rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
884 if (rc != X86EMUL_CONTINUE) \
886 ctxt->_eip += sizeof(_type); \
887 _x = *(_type __aligned(1) *) ctxt->fetch.ptr; \
888 ctxt->fetch.ptr += sizeof(_type); \
892 #define insn_fetch_arr(_arr, _size, _ctxt) \
894 rc = do_insn_fetch_bytes(_ctxt, _size); \
895 if (rc != X86EMUL_CONTINUE) \
897 ctxt->_eip += (_size); \
898 memcpy(_arr, ctxt->fetch.ptr, _size); \
899 ctxt->fetch.ptr += (_size); \
903 * Given the 'reg' portion of a ModRM byte, and a register block, return a
904 * pointer into the block that addresses the relevant register.
905 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
907 static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
911 int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
913 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
914 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
916 p = reg_rmw(ctxt, modrm_reg);
920 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
921 struct segmented_address addr,
922 u16 *size, unsigned long *address, int op_bytes)
929 rc = segmented_read_std(ctxt, addr, size, 2);
930 if (rc != X86EMUL_CONTINUE)
933 rc = segmented_read_std(ctxt, addr, address, op_bytes);
947 FASTOP1SRC2(mul, mul_ex);
948 FASTOP1SRC2(imul, imul_ex);
949 FASTOP1SRC2EX(div, div_ex);
950 FASTOP1SRC2EX(idiv, idiv_ex);
979 FASTOP2R(cmp, cmp_r);
981 static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
983 /* If src is zero, do not writeback, but update flags */
984 if (ctxt->src.val == 0)
985 ctxt->dst.type = OP_NONE;
986 return fastop(ctxt, em_bsf);
989 static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
991 /* If src is zero, do not writeback, but update flags */
992 if (ctxt->src.val == 0)
993 ctxt->dst.type = OP_NONE;
994 return fastop(ctxt, em_bsr);
997 static u8 test_cc(unsigned int condition, unsigned long flags)
1000 void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
1002 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
1003 asm("push %[flags]; popf; call *%[fastop]"
1004 : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
1008 static void fetch_register_operand(struct operand *op)
1010 switch (op->bytes) {
1012 op->val = *(u8 *)op->addr.reg;
1015 op->val = *(u16 *)op->addr.reg;
1018 op->val = *(u32 *)op->addr.reg;
1021 op->val = *(u64 *)op->addr.reg;
1026 static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
1028 ctxt->ops->get_fpu(ctxt);
1030 case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
1031 case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
1032 case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break;
1033 case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break;
1034 case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break;
1035 case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break;
1036 case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break;
1037 case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break;
1038 #ifdef CONFIG_X86_64
1039 case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break;
1040 case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break;
1041 case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break;
1042 case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break;
1043 case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break;
1044 case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break;
1045 case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break;
1046 case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break;
1050 ctxt->ops->put_fpu(ctxt);
1053 static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
1056 ctxt->ops->get_fpu(ctxt);
1058 case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
1059 case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
1060 case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break;
1061 case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break;
1062 case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break;
1063 case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break;
1064 case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break;
1065 case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break;
1066 #ifdef CONFIG_X86_64
1067 case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break;
1068 case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break;
1069 case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break;
1070 case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break;
1071 case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break;
1072 case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break;
1073 case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break;
1074 case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break;
1078 ctxt->ops->put_fpu(ctxt);
1081 static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
1083 ctxt->ops->get_fpu(ctxt);
1085 case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
1086 case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
1087 case 2: asm("movq %%mm2, %0" : "=m"(*data)); break;
1088 case 3: asm("movq %%mm3, %0" : "=m"(*data)); break;
1089 case 4: asm("movq %%mm4, %0" : "=m"(*data)); break;
1090 case 5: asm("movq %%mm5, %0" : "=m"(*data)); break;
1091 case 6: asm("movq %%mm6, %0" : "=m"(*data)); break;
1092 case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
1095 ctxt->ops->put_fpu(ctxt);
1098 static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
1100 ctxt->ops->get_fpu(ctxt);
1102 case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
1103 case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
1104 case 2: asm("movq %0, %%mm2" : : "m"(*data)); break;
1105 case 3: asm("movq %0, %%mm3" : : "m"(*data)); break;
1106 case 4: asm("movq %0, %%mm4" : : "m"(*data)); break;
1107 case 5: asm("movq %0, %%mm5" : : "m"(*data)); break;
1108 case 6: asm("movq %0, %%mm6" : : "m"(*data)); break;
1109 case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
1112 ctxt->ops->put_fpu(ctxt);
1115 static int em_fninit(struct x86_emulate_ctxt *ctxt)
1117 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1118 return emulate_nm(ctxt);
1120 ctxt->ops->get_fpu(ctxt);
1121 asm volatile("fninit");
1122 ctxt->ops->put_fpu(ctxt);
1123 return X86EMUL_CONTINUE;
1126 static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1130 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1131 return emulate_nm(ctxt);
1133 ctxt->ops->get_fpu(ctxt);
1134 asm volatile("fnstcw %0": "+m"(fcw));
1135 ctxt->ops->put_fpu(ctxt);
1137 ctxt->dst.val = fcw;
1139 return X86EMUL_CONTINUE;
1142 static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1146 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1147 return emulate_nm(ctxt);
1149 ctxt->ops->get_fpu(ctxt);
1150 asm volatile("fnstsw %0": "+m"(fsw));
1151 ctxt->ops->put_fpu(ctxt);
1153 ctxt->dst.val = fsw;
1155 return X86EMUL_CONTINUE;
1158 static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1161 unsigned reg = ctxt->modrm_reg;
1163 if (!(ctxt->d & ModRM))
1164 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
1166 if (ctxt->d & Sse) {
1170 read_sse_reg(ctxt, &op->vec_val, reg);
1173 if (ctxt->d & Mmx) {
1182 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1183 op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1185 fetch_register_operand(op);
1186 op->orig_val = op->val;
1189 static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
1191 if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
1192 ctxt->modrm_seg = VCPU_SREG_SS;
1195 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1199 int index_reg, base_reg, scale;
1200 int rc = X86EMUL_CONTINUE;
1203 ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
1204 index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
1205 base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1207 ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1208 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1209 ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1210 ctxt->modrm_seg = VCPU_SREG_DS;
1212 if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1214 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1215 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1217 if (ctxt->d & Sse) {
1220 op->addr.xmm = ctxt->modrm_rm;
1221 read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm);
1224 if (ctxt->d & Mmx) {
1227 op->addr.mm = ctxt->modrm_rm & 7;
1230 fetch_register_operand(op);
1236 if (ctxt->ad_bytes == 2) {
1237 unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
1238 unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
1239 unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
1240 unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1242 /* 16-bit ModR/M decode. */
1243 switch (ctxt->modrm_mod) {
1245 if (ctxt->modrm_rm == 6)
1246 modrm_ea += insn_fetch(u16, ctxt);
1249 modrm_ea += insn_fetch(s8, ctxt);
1252 modrm_ea += insn_fetch(u16, ctxt);
1255 switch (ctxt->modrm_rm) {
1257 modrm_ea += bx + si;
1260 modrm_ea += bx + di;
1263 modrm_ea += bp + si;
1266 modrm_ea += bp + di;
1275 if (ctxt->modrm_mod != 0)
1282 if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
1283 (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
1284 ctxt->modrm_seg = VCPU_SREG_SS;
1285 modrm_ea = (u16)modrm_ea;
1287 /* 32/64-bit ModR/M decode. */
1288 if ((ctxt->modrm_rm & 7) == 4) {
1289 sib = insn_fetch(u8, ctxt);
1290 index_reg |= (sib >> 3) & 7;
1291 base_reg |= sib & 7;
1294 if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1295 modrm_ea += insn_fetch(s32, ctxt);
1297 modrm_ea += reg_read(ctxt, base_reg);
1298 adjust_modrm_seg(ctxt, base_reg);
1299 /* Increment ESP on POP [ESP] */
1300 if ((ctxt->d & IncSP) &&
1301 base_reg == VCPU_REGS_RSP)
1302 modrm_ea += ctxt->op_bytes;
1305 modrm_ea += reg_read(ctxt, index_reg) << scale;
1306 } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1307 modrm_ea += insn_fetch(s32, ctxt);
1308 if (ctxt->mode == X86EMUL_MODE_PROT64)
1309 ctxt->rip_relative = 1;
1311 base_reg = ctxt->modrm_rm;
1312 modrm_ea += reg_read(ctxt, base_reg);
1313 adjust_modrm_seg(ctxt, base_reg);
1315 switch (ctxt->modrm_mod) {
1317 modrm_ea += insn_fetch(s8, ctxt);
1320 modrm_ea += insn_fetch(s32, ctxt);
1324 op->addr.mem.ea = modrm_ea;
1325 if (ctxt->ad_bytes != 8)
1326 ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
1332 static int decode_abs(struct x86_emulate_ctxt *ctxt,
1335 int rc = X86EMUL_CONTINUE;
1338 switch (ctxt->ad_bytes) {
1340 op->addr.mem.ea = insn_fetch(u16, ctxt);
1343 op->addr.mem.ea = insn_fetch(u32, ctxt);
1346 op->addr.mem.ea = insn_fetch(u64, ctxt);
1353 static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1357 if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1358 mask = ~((long)ctxt->dst.bytes * 8 - 1);
1360 if (ctxt->src.bytes == 2)
1361 sv = (s16)ctxt->src.val & (s16)mask;
1362 else if (ctxt->src.bytes == 4)
1363 sv = (s32)ctxt->src.val & (s32)mask;
1365 sv = (s64)ctxt->src.val & (s64)mask;
1367 ctxt->dst.addr.mem.ea = address_mask(ctxt,
1368 ctxt->dst.addr.mem.ea + (sv >> 3));
1371 /* only subword offset */
1372 ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1375 static int read_emulated(struct x86_emulate_ctxt *ctxt,
1376 unsigned long addr, void *dest, unsigned size)
1379 struct read_cache *mc = &ctxt->mem_read;
1381 if (mc->pos < mc->end)
1384 WARN_ON((mc->end + size) >= sizeof(mc->data));
1386 rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
1388 if (rc != X86EMUL_CONTINUE)
1394 memcpy(dest, mc->data + mc->pos, size);
1396 return X86EMUL_CONTINUE;
1399 static int segmented_read(struct x86_emulate_ctxt *ctxt,
1400 struct segmented_address addr,
1407 rc = linearize(ctxt, addr, size, false, &linear);
1408 if (rc != X86EMUL_CONTINUE)
1410 return read_emulated(ctxt, linear, data, size);
1413 static int segmented_write(struct x86_emulate_ctxt *ctxt,
1414 struct segmented_address addr,
1421 rc = linearize(ctxt, addr, size, true, &linear);
1422 if (rc != X86EMUL_CONTINUE)
1424 return ctxt->ops->write_emulated(ctxt, linear, data, size,
1428 static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
1429 struct segmented_address addr,
1430 const void *orig_data, const void *data,
1436 rc = linearize(ctxt, addr, size, true, &linear);
1437 if (rc != X86EMUL_CONTINUE)
1439 return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
1440 size, &ctxt->exception);
1443 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1444 unsigned int size, unsigned short port,
1447 struct read_cache *rc = &ctxt->io_read;
1449 if (rc->pos == rc->end) { /* refill pio read ahead */
1450 unsigned int in_page, n;
1451 unsigned int count = ctxt->rep_prefix ?
1452 address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1453 in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
1454 offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1455 PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1456 n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1459 rc->pos = rc->end = 0;
1460 if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1465 if (ctxt->rep_prefix && (ctxt->d & String) &&
1466 !(ctxt->eflags & X86_EFLAGS_DF)) {
1467 ctxt->dst.data = rc->data + rc->pos;
1468 ctxt->dst.type = OP_MEM_STR;
1469 ctxt->dst.count = (rc->end - rc->pos) / size;
1472 memcpy(dest, rc->data + rc->pos, size);
1478 static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1479 u16 index, struct desc_struct *desc)
1484 ctxt->ops->get_idt(ctxt, &dt);
1486 if (dt.size < index * 8 + 7)
1487 return emulate_gp(ctxt, index << 3 | 0x2);
1489 addr = dt.address + index * 8;
1490 return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
1494 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1495 u16 selector, struct desc_ptr *dt)
1497 const struct x86_emulate_ops *ops = ctxt->ops;
1500 if (selector & 1 << 2) {
1501 struct desc_struct desc;
1504 memset (dt, 0, sizeof *dt);
1505 if (!ops->get_segment(ctxt, &sel, &desc, &base3,
1509 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1510 dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1512 ops->get_gdt(ctxt, dt);
1515 static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
1516 u16 selector, ulong *desc_addr_p)
1519 u16 index = selector >> 3;
1522 get_descriptor_table_ptr(ctxt, selector, &dt);
1524 if (dt.size < index * 8 + 7)
1525 return emulate_gp(ctxt, selector & 0xfffc);
1527 addr = dt.address + index * 8;
1529 #ifdef CONFIG_X86_64
1530 if (addr >> 32 != 0) {
1533 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1534 if (!(efer & EFER_LMA))
1539 *desc_addr_p = addr;
1540 return X86EMUL_CONTINUE;
1543 /* allowed just for 8 bytes segments */
1544 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1545 u16 selector, struct desc_struct *desc,
1550 rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
1551 if (rc != X86EMUL_CONTINUE)
1554 return ctxt->ops->read_std(ctxt, *desc_addr_p, desc, sizeof(*desc),
1558 /* allowed just for 8 bytes segments */
1559 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1560 u16 selector, struct desc_struct *desc)
1565 rc = get_descriptor_ptr(ctxt, selector, &addr);
1566 if (rc != X86EMUL_CONTINUE)
1569 return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc,
1573 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1574 u16 selector, int seg, u8 cpl,
1575 enum x86_transfer_type transfer,
1576 struct desc_struct *desc)
1578 struct desc_struct seg_desc, old_desc;
1580 unsigned err_vec = GP_VECTOR;
1582 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1588 memset(&seg_desc, 0, sizeof seg_desc);
1590 if (ctxt->mode == X86EMUL_MODE_REAL) {
1591 /* set real mode segment descriptor (keep limit etc. for
1593 ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1594 set_desc_base(&seg_desc, selector << 4);
1596 } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
1597 /* VM86 needs a clean new segment descriptor */
1598 set_desc_base(&seg_desc, selector << 4);
1599 set_desc_limit(&seg_desc, 0xffff);
1609 /* TR should be in GDT only */
1610 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1613 /* NULL selector is not valid for TR, CS and (except for long mode) SS */
1614 if (null_selector) {
1615 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
1618 if (seg == VCPU_SREG_SS) {
1619 if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
1623 * ctxt->ops->set_segment expects the CPL to be in
1624 * SS.DPL, so fake an expand-up 32-bit data segment.
1634 /* Skip all following checks */
1638 ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1639 if (ret != X86EMUL_CONTINUE)
1642 err_code = selector & 0xfffc;
1643 err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
1646 /* can't load system descriptor into segment selector */
1647 if (seg <= VCPU_SREG_GS && !seg_desc.s) {
1648 if (transfer == X86_TRANSFER_CALL_JMP)
1649 return X86EMUL_UNHANDLEABLE;
1654 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1663 * segment is not a writable data segment or segment
1664 * selector's RPL != CPL or segment selector's RPL != CPL
1666 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1670 if (!(seg_desc.type & 8))
1673 if (seg_desc.type & 4) {
1679 if (rpl > cpl || dpl != cpl)
1682 /* in long-mode d/b must be clear if l is set */
1683 if (seg_desc.d && seg_desc.l) {
1686 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1687 if (efer & EFER_LMA)
1691 /* CS(RPL) <- CPL */
1692 selector = (selector & 0xfffc) | cpl;
1695 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1697 old_desc = seg_desc;
1698 seg_desc.type |= 2; /* busy */
1699 ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
1700 sizeof(seg_desc), &ctxt->exception);
1701 if (ret != X86EMUL_CONTINUE)
1704 case VCPU_SREG_LDTR:
1705 if (seg_desc.s || seg_desc.type != 2)
1708 default: /* DS, ES, FS, or GS */
1710 * segment is not a data or readable code segment or
1711 * ((segment is a data or nonconforming code segment)
1712 * and (both RPL and CPL > DPL))
1714 if ((seg_desc.type & 0xa) == 0x8 ||
1715 (((seg_desc.type & 0xc) != 0xc) &&
1716 (rpl > dpl && cpl > dpl)))
1722 /* mark segment as accessed */
1723 if (!(seg_desc.type & 1)) {
1725 ret = write_segment_descriptor(ctxt, selector,
1727 if (ret != X86EMUL_CONTINUE)
1730 } else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1731 ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3,
1732 sizeof(base3), &ctxt->exception);
1733 if (ret != X86EMUL_CONTINUE)
1735 if (is_noncanonical_address(get_desc_base(&seg_desc) |
1736 ((u64)base3 << 32)))
1737 return emulate_gp(ctxt, 0);
1740 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1743 return X86EMUL_CONTINUE;
1745 return emulate_exception(ctxt, err_vec, err_code, true);
1748 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1749 u16 selector, int seg)
1751 u8 cpl = ctxt->ops->cpl(ctxt);
1754 * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
1755 * they can load it at CPL<3 (Intel's manual says only LSS can,
1758 * However, the Intel manual says that putting IST=1/DPL=3 in
1759 * an interrupt gate will result in SS=3 (the AMD manual instead
1760 * says it doesn't), so allow SS=3 in __load_segment_descriptor
1761 * and only forbid it here.
1763 if (seg == VCPU_SREG_SS && selector == 3 &&
1764 ctxt->mode == X86EMUL_MODE_PROT64)
1765 return emulate_exception(ctxt, GP_VECTOR, 0, true);
1767 return __load_segment_descriptor(ctxt, selector, seg, cpl,
1768 X86_TRANSFER_NONE, NULL);
1771 static void write_register_operand(struct operand *op)
1773 return assign_register(op->addr.reg, op->val, op->bytes);
1776 static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1780 write_register_operand(op);
1783 if (ctxt->lock_prefix)
1784 return segmented_cmpxchg(ctxt,
1790 return segmented_write(ctxt,
1796 return segmented_write(ctxt,
1799 op->bytes * op->count);
1802 write_sse_reg(ctxt, &op->vec_val, op->addr.xmm);
1805 write_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
1813 return X86EMUL_CONTINUE;
1816 static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
1818 struct segmented_address addr;
1820 rsp_increment(ctxt, -bytes);
1821 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1822 addr.seg = VCPU_SREG_SS;
1824 return segmented_write(ctxt, addr, data, bytes);
1827 static int em_push(struct x86_emulate_ctxt *ctxt)
1829 /* Disable writeback. */
1830 ctxt->dst.type = OP_NONE;
1831 return push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1834 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1835 void *dest, int len)
1838 struct segmented_address addr;
1840 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1841 addr.seg = VCPU_SREG_SS;
1842 rc = segmented_read(ctxt, addr, dest, len);
1843 if (rc != X86EMUL_CONTINUE)
1846 rsp_increment(ctxt, len);
1850 static int em_pop(struct x86_emulate_ctxt *ctxt)
1852 return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1855 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1856 void *dest, int len)
1859 unsigned long val, change_mask;
1860 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
1861 int cpl = ctxt->ops->cpl(ctxt);
1863 rc = emulate_pop(ctxt, &val, len);
1864 if (rc != X86EMUL_CONTINUE)
1867 change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
1868 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
1869 X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
1870 X86_EFLAGS_AC | X86_EFLAGS_ID;
1872 switch(ctxt->mode) {
1873 case X86EMUL_MODE_PROT64:
1874 case X86EMUL_MODE_PROT32:
1875 case X86EMUL_MODE_PROT16:
1877 change_mask |= X86_EFLAGS_IOPL;
1879 change_mask |= X86_EFLAGS_IF;
1881 case X86EMUL_MODE_VM86:
1883 return emulate_gp(ctxt, 0);
1884 change_mask |= X86_EFLAGS_IF;
1886 default: /* real mode */
1887 change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
1891 *(unsigned long *)dest =
1892 (ctxt->eflags & ~change_mask) | (val & change_mask);
1897 static int em_popf(struct x86_emulate_ctxt *ctxt)
1899 ctxt->dst.type = OP_REG;
1900 ctxt->dst.addr.reg = &ctxt->eflags;
1901 ctxt->dst.bytes = ctxt->op_bytes;
1902 return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1905 static int em_enter(struct x86_emulate_ctxt *ctxt)
1908 unsigned frame_size = ctxt->src.val;
1909 unsigned nesting_level = ctxt->src2.val & 31;
1913 return X86EMUL_UNHANDLEABLE;
1915 rbp = reg_read(ctxt, VCPU_REGS_RBP);
1916 rc = push(ctxt, &rbp, stack_size(ctxt));
1917 if (rc != X86EMUL_CONTINUE)
1919 assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
1921 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
1922 reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
1924 return X86EMUL_CONTINUE;
1927 static int em_leave(struct x86_emulate_ctxt *ctxt)
1929 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
1931 return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
1934 static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1936 int seg = ctxt->src2.val;
1938 ctxt->src.val = get_segment_selector(ctxt, seg);
1939 if (ctxt->op_bytes == 4) {
1940 rsp_increment(ctxt, -2);
1944 return em_push(ctxt);
1947 static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1949 int seg = ctxt->src2.val;
1950 unsigned long selector;
1953 rc = emulate_pop(ctxt, &selector, 2);
1954 if (rc != X86EMUL_CONTINUE)
1957 if (ctxt->modrm_reg == VCPU_SREG_SS)
1958 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
1959 if (ctxt->op_bytes > 2)
1960 rsp_increment(ctxt, ctxt->op_bytes - 2);
1962 rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1966 static int em_pusha(struct x86_emulate_ctxt *ctxt)
1968 unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
1969 int rc = X86EMUL_CONTINUE;
1970 int reg = VCPU_REGS_RAX;
1972 while (reg <= VCPU_REGS_RDI) {
1973 (reg == VCPU_REGS_RSP) ?
1974 (ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
1977 if (rc != X86EMUL_CONTINUE)
1986 static int em_pushf(struct x86_emulate_ctxt *ctxt)
1988 ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
1989 return em_push(ctxt);
1992 static int em_popa(struct x86_emulate_ctxt *ctxt)
1994 int rc = X86EMUL_CONTINUE;
1995 int reg = VCPU_REGS_RDI;
1998 while (reg >= VCPU_REGS_RAX) {
1999 if (reg == VCPU_REGS_RSP) {
2000 rsp_increment(ctxt, ctxt->op_bytes);
2004 rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
2005 if (rc != X86EMUL_CONTINUE)
2007 assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
2013 static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2015 const struct x86_emulate_ops *ops = ctxt->ops;
2022 /* TODO: Add limit checks */
2023 ctxt->src.val = ctxt->eflags;
2025 if (rc != X86EMUL_CONTINUE)
2028 ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
2030 ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
2032 if (rc != X86EMUL_CONTINUE)
2035 ctxt->src.val = ctxt->_eip;
2037 if (rc != X86EMUL_CONTINUE)
2040 ops->get_idt(ctxt, &dt);
2042 eip_addr = dt.address + (irq << 2);
2043 cs_addr = dt.address + (irq << 2) + 2;
2045 rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception);
2046 if (rc != X86EMUL_CONTINUE)
2049 rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception);
2050 if (rc != X86EMUL_CONTINUE)
2053 rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
2054 if (rc != X86EMUL_CONTINUE)
2062 int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2066 invalidate_registers(ctxt);
2067 rc = __emulate_int_real(ctxt, irq);
2068 if (rc == X86EMUL_CONTINUE)
2069 writeback_registers(ctxt);
2073 static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2075 switch(ctxt->mode) {
2076 case X86EMUL_MODE_REAL:
2077 return __emulate_int_real(ctxt, irq);
2078 case X86EMUL_MODE_VM86:
2079 case X86EMUL_MODE_PROT16:
2080 case X86EMUL_MODE_PROT32:
2081 case X86EMUL_MODE_PROT64:
2083 /* Protected mode interrupts unimplemented yet */
2084 return X86EMUL_UNHANDLEABLE;
2088 static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2090 int rc = X86EMUL_CONTINUE;
2091 unsigned long temp_eip = 0;
2092 unsigned long temp_eflags = 0;
2093 unsigned long cs = 0;
2094 unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
2095 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
2096 X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
2097 X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
2098 X86_EFLAGS_AC | X86_EFLAGS_ID |
2100 unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
2103 /* TODO: Add stack limit check */
2105 rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2107 if (rc != X86EMUL_CONTINUE)
2110 if (temp_eip & ~0xffff)
2111 return emulate_gp(ctxt, 0);
2113 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2115 if (rc != X86EMUL_CONTINUE)
2118 rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2120 if (rc != X86EMUL_CONTINUE)
2123 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2125 if (rc != X86EMUL_CONTINUE)
2128 ctxt->_eip = temp_eip;
2130 if (ctxt->op_bytes == 4)
2131 ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2132 else if (ctxt->op_bytes == 2) {
2133 ctxt->eflags &= ~0xffff;
2134 ctxt->eflags |= temp_eflags;
2137 ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
2138 ctxt->eflags |= X86_EFLAGS_FIXED;
2139 ctxt->ops->set_nmi_mask(ctxt, false);
2144 static int em_iret(struct x86_emulate_ctxt *ctxt)
2146 switch(ctxt->mode) {
2147 case X86EMUL_MODE_REAL:
2148 return emulate_iret_real(ctxt);
2149 case X86EMUL_MODE_VM86:
2150 case X86EMUL_MODE_PROT16:
2151 case X86EMUL_MODE_PROT32:
2152 case X86EMUL_MODE_PROT64:
2154 /* iret from protected mode unimplemented yet */
2155 return X86EMUL_UNHANDLEABLE;
2159 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2163 struct desc_struct new_desc;
2164 u8 cpl = ctxt->ops->cpl(ctxt);
2166 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2168 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
2169 X86_TRANSFER_CALL_JMP,
2171 if (rc != X86EMUL_CONTINUE)
2174 rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
2175 /* Error handling is not implemented. */
2176 if (rc != X86EMUL_CONTINUE)
2177 return X86EMUL_UNHANDLEABLE;
2182 static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2184 return assign_eip_near(ctxt, ctxt->src.val);
2187 static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
2192 old_eip = ctxt->_eip;
2193 rc = assign_eip_near(ctxt, ctxt->src.val);
2194 if (rc != X86EMUL_CONTINUE)
2196 ctxt->src.val = old_eip;
2201 static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2203 u64 old = ctxt->dst.orig_val64;
2205 if (ctxt->dst.bytes == 16)
2206 return X86EMUL_UNHANDLEABLE;
2208 if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
2209 ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
2210 *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
2211 *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2212 ctxt->eflags &= ~X86_EFLAGS_ZF;
2214 ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
2215 (u32) reg_read(ctxt, VCPU_REGS_RBX);
2217 ctxt->eflags |= X86_EFLAGS_ZF;
2219 return X86EMUL_CONTINUE;
2222 static int em_ret(struct x86_emulate_ctxt *ctxt)
2227 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2228 if (rc != X86EMUL_CONTINUE)
2231 return assign_eip_near(ctxt, eip);
2234 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2237 unsigned long eip, cs;
2238 int cpl = ctxt->ops->cpl(ctxt);
2239 struct desc_struct new_desc;
2241 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2242 if (rc != X86EMUL_CONTINUE)
2244 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2245 if (rc != X86EMUL_CONTINUE)
2247 /* Outer-privilege level return is not implemented */
2248 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2249 return X86EMUL_UNHANDLEABLE;
2250 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
2253 if (rc != X86EMUL_CONTINUE)
2255 rc = assign_eip_far(ctxt, eip, &new_desc);
2256 /* Error handling is not implemented. */
2257 if (rc != X86EMUL_CONTINUE)
2258 return X86EMUL_UNHANDLEABLE;
2263 static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2267 rc = em_ret_far(ctxt);
2268 if (rc != X86EMUL_CONTINUE)
2270 rsp_increment(ctxt, ctxt->src.val);
2271 return X86EMUL_CONTINUE;
2274 static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2276 /* Save real source value, then compare EAX against destination. */
2277 ctxt->dst.orig_val = ctxt->dst.val;
2278 ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2279 ctxt->src.orig_val = ctxt->src.val;
2280 ctxt->src.val = ctxt->dst.orig_val;
2281 fastop(ctxt, em_cmp);
2283 if (ctxt->eflags & X86_EFLAGS_ZF) {
2284 /* Success: write back to memory; no update of EAX */
2285 ctxt->src.type = OP_NONE;
2286 ctxt->dst.val = ctxt->src.orig_val;
2288 /* Failure: write the value we saw to EAX. */
2289 ctxt->src.type = OP_REG;
2290 ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2291 ctxt->src.val = ctxt->dst.orig_val;
2292 /* Create write-cycle to dest by writing the same value */
2293 ctxt->dst.val = ctxt->dst.orig_val;
2295 return X86EMUL_CONTINUE;
2298 static int em_lseg(struct x86_emulate_ctxt *ctxt)
2300 int seg = ctxt->src2.val;
2304 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2306 rc = load_segment_descriptor(ctxt, sel, seg);
2307 if (rc != X86EMUL_CONTINUE)
2310 ctxt->dst.val = ctxt->src.val;
2314 static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
2316 u32 eax, ebx, ecx, edx;
2320 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2321 return edx & bit(X86_FEATURE_LM);
2324 #define GET_SMSTATE(type, smbase, offset) \
2327 int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val, \
2329 if (r != X86EMUL_CONTINUE) \
2330 return X86EMUL_UNHANDLEABLE; \
2334 static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
2336 desc->g = (flags >> 23) & 1;
2337 desc->d = (flags >> 22) & 1;
2338 desc->l = (flags >> 21) & 1;
2339 desc->avl = (flags >> 20) & 1;
2340 desc->p = (flags >> 15) & 1;
2341 desc->dpl = (flags >> 13) & 3;
2342 desc->s = (flags >> 12) & 1;
2343 desc->type = (flags >> 8) & 15;
2346 static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
2348 struct desc_struct desc;
2352 selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4);
2355 offset = 0x7f84 + n * 12;
2357 offset = 0x7f2c + (n - 3) * 12;
2359 set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
2360 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
2361 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset));
2362 ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
2363 return X86EMUL_CONTINUE;
2366 static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
2368 struct desc_struct desc;
2373 offset = 0x7e00 + n * 16;
2375 selector = GET_SMSTATE(u16, smbase, offset);
2376 rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8);
2377 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
2378 set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
2379 base3 = GET_SMSTATE(u32, smbase, offset + 12);
2381 ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
2382 return X86EMUL_CONTINUE;
2385 static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2391 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
2392 * Then enable protected mode. However, PCID cannot be enabled
2393 * if EFER.LMA=0, so set it separately.
2395 bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
2397 return X86EMUL_UNHANDLEABLE;
2399 bad = ctxt->ops->set_cr(ctxt, 0, cr0);
2401 return X86EMUL_UNHANDLEABLE;
2403 if (cr4 & X86_CR4_PCIDE) {
2404 bad = ctxt->ops->set_cr(ctxt, 4, cr4);
2406 return X86EMUL_UNHANDLEABLE;
2409 return X86EMUL_CONTINUE;
2412 static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
2414 struct desc_struct desc;
2420 cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
2421 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
2422 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
2423 ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
2425 for (i = 0; i < 8; i++)
2426 *reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4);
2428 val = GET_SMSTATE(u32, smbase, 0x7fcc);
2429 ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
2430 val = GET_SMSTATE(u32, smbase, 0x7fc8);
2431 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
2433 selector = GET_SMSTATE(u32, smbase, 0x7fc4);
2434 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f64));
2435 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f60));
2436 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f5c));
2437 ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
2439 selector = GET_SMSTATE(u32, smbase, 0x7fc0);
2440 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f80));
2441 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f7c));
2442 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f78));
2443 ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
2445 dt.address = GET_SMSTATE(u32, smbase, 0x7f74);
2446 dt.size = GET_SMSTATE(u32, smbase, 0x7f70);
2447 ctxt->ops->set_gdt(ctxt, &dt);
2449 dt.address = GET_SMSTATE(u32, smbase, 0x7f58);
2450 dt.size = GET_SMSTATE(u32, smbase, 0x7f54);
2451 ctxt->ops->set_idt(ctxt, &dt);
2453 for (i = 0; i < 6; i++) {
2454 int r = rsm_load_seg_32(ctxt, smbase, i);
2455 if (r != X86EMUL_CONTINUE)
2459 cr4 = GET_SMSTATE(u32, smbase, 0x7f14);
2461 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
2463 return rsm_enter_protected_mode(ctxt, cr0, cr4);
2466 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2468 struct desc_struct desc;
2475 for (i = 0; i < 16; i++)
2476 *reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
2478 ctxt->_eip = GET_SMSTATE(u64, smbase, 0x7f78);
2479 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED;
2481 val = GET_SMSTATE(u32, smbase, 0x7f68);
2482 ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
2483 val = GET_SMSTATE(u32, smbase, 0x7f60);
2484 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
2486 cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
2487 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50));
2488 cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
2489 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
2490 val = GET_SMSTATE(u64, smbase, 0x7ed0);
2491 ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
2493 selector = GET_SMSTATE(u32, smbase, 0x7e90);
2494 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e92) << 8);
2495 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e94));
2496 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e98));
2497 base3 = GET_SMSTATE(u32, smbase, 0x7e9c);
2498 ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
2500 dt.size = GET_SMSTATE(u32, smbase, 0x7e84);
2501 dt.address = GET_SMSTATE(u64, smbase, 0x7e88);
2502 ctxt->ops->set_idt(ctxt, &dt);
2504 selector = GET_SMSTATE(u32, smbase, 0x7e70);
2505 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e72) << 8);
2506 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e74));
2507 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e78));
2508 base3 = GET_SMSTATE(u32, smbase, 0x7e7c);
2509 ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
2511 dt.size = GET_SMSTATE(u32, smbase, 0x7e64);
2512 dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
2513 ctxt->ops->set_gdt(ctxt, &dt);
2515 r = rsm_enter_protected_mode(ctxt, cr0, cr4);
2516 if (r != X86EMUL_CONTINUE)
2519 for (i = 0; i < 6; i++) {
2520 r = rsm_load_seg_64(ctxt, smbase, i);
2521 if (r != X86EMUL_CONTINUE)
2525 return X86EMUL_CONTINUE;
2528 static int em_rsm(struct x86_emulate_ctxt *ctxt)
2530 unsigned long cr0, cr4, efer;
2534 if ((ctxt->emul_flags & X86EMUL_SMM_MASK) == 0)
2535 return emulate_ud(ctxt);
2538 * Get back to real mode, to prepare a safe state in which to load
2539 * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
2540 * supports long mode.
2542 cr4 = ctxt->ops->get_cr(ctxt, 4);
2543 if (emulator_has_longmode(ctxt)) {
2544 struct desc_struct cs_desc;
2546 /* Zero CR4.PCIDE before CR0.PG. */
2547 if (cr4 & X86_CR4_PCIDE) {
2548 ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
2549 cr4 &= ~X86_CR4_PCIDE;
2552 /* A 32-bit code segment is required to clear EFER.LMA. */
2553 memset(&cs_desc, 0, sizeof(cs_desc));
2555 cs_desc.s = cs_desc.g = cs_desc.p = 1;
2556 ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
2559 /* For the 64-bit case, this will clear EFER.LMA. */
2560 cr0 = ctxt->ops->get_cr(ctxt, 0);
2561 if (cr0 & X86_CR0_PE)
2562 ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
2564 /* Now clear CR4.PAE (which must be done before clearing EFER.LME). */
2565 if (cr4 & X86_CR4_PAE)
2566 ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
2568 /* And finally go back to 32-bit mode. */
2570 ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
2572 smbase = ctxt->ops->get_smbase(ctxt);
2573 if (emulator_has_longmode(ctxt))
2574 ret = rsm_load_state_64(ctxt, smbase + 0x8000);
2576 ret = rsm_load_state_32(ctxt, smbase + 0x8000);
2578 if (ret != X86EMUL_CONTINUE) {
2579 /* FIXME: should triple fault */
2580 return X86EMUL_UNHANDLEABLE;
2583 if ((ctxt->emul_flags & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
2584 ctxt->ops->set_nmi_mask(ctxt, false);
2586 ctxt->emul_flags &= ~X86EMUL_SMM_INSIDE_NMI_MASK;
2587 ctxt->emul_flags &= ~X86EMUL_SMM_MASK;
2588 return X86EMUL_CONTINUE;
2592 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
2593 struct desc_struct *cs, struct desc_struct *ss)
2595 cs->l = 0; /* will be adjusted later */
2596 set_desc_base(cs, 0); /* flat segment */
2597 cs->g = 1; /* 4kb granularity */
2598 set_desc_limit(cs, 0xfffff); /* 4GB limit */
2599 cs->type = 0x0b; /* Read, Execute, Accessed */
2601 cs->dpl = 0; /* will be adjusted later */
2606 set_desc_base(ss, 0); /* flat segment */
2607 set_desc_limit(ss, 0xfffff); /* 4GB limit */
2608 ss->g = 1; /* 4kb granularity */
2610 ss->type = 0x03; /* Read/Write, Accessed */
2611 ss->d = 1; /* 32bit stack segment */
2618 static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
2620 u32 eax, ebx, ecx, edx;
2623 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2624 return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx
2625 && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx
2626 && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx;
2629 static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
2631 const struct x86_emulate_ops *ops = ctxt->ops;
2632 u32 eax, ebx, ecx, edx;
2635 * syscall should always be enabled in longmode - so only become
2636 * vendor specific (cpuid) if other modes are active...
2638 if (ctxt->mode == X86EMUL_MODE_PROT64)
2643 ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2645 * Intel ("GenuineIntel")
2646 * remark: Intel CPUs only support "syscall" in 64bit
2647 * longmode. Also an 64bit guest with a
2648 * 32bit compat-app running will #UD !! While this
2649 * behaviour can be fixed (by emulating) into AMD
2650 * response - CPUs of AMD can't behave like Intel.
2652 if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx &&
2653 ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx &&
2654 edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx)
2657 /* AMD ("AuthenticAMD") */
2658 if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx &&
2659 ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx &&
2660 edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
2663 /* AMD ("AMDisbetter!") */
2664 if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx &&
2665 ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx &&
2666 edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx)
2669 /* default: (not Intel, not AMD), apply Intel's stricter rules... */
2673 static int em_syscall(struct x86_emulate_ctxt *ctxt)
2675 const struct x86_emulate_ops *ops = ctxt->ops;
2676 struct desc_struct cs, ss;
2681 /* syscall is not available in real mode */
2682 if (ctxt->mode == X86EMUL_MODE_REAL ||
2683 ctxt->mode == X86EMUL_MODE_VM86)
2684 return emulate_ud(ctxt);
2686 if (!(em_syscall_is_enabled(ctxt)))
2687 return emulate_ud(ctxt);
2689 ops->get_msr(ctxt, MSR_EFER, &efer);
2690 setup_syscalls_segments(ctxt, &cs, &ss);
2692 if (!(efer & EFER_SCE))
2693 return emulate_ud(ctxt);
2695 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2697 cs_sel = (u16)(msr_data & 0xfffc);
2698 ss_sel = (u16)(msr_data + 8);
2700 if (efer & EFER_LMA) {
2704 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2705 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2707 *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2708 if (efer & EFER_LMA) {
2709 #ifdef CONFIG_X86_64
2710 *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2713 ctxt->mode == X86EMUL_MODE_PROT64 ?
2714 MSR_LSTAR : MSR_CSTAR, &msr_data);
2715 ctxt->_eip = msr_data;
2717 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2718 ctxt->eflags &= ~msr_data;
2719 ctxt->eflags |= X86_EFLAGS_FIXED;
2723 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2724 ctxt->_eip = (u32)msr_data;
2726 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2729 return X86EMUL_CONTINUE;
2732 static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2734 const struct x86_emulate_ops *ops = ctxt->ops;
2735 struct desc_struct cs, ss;
2740 ops->get_msr(ctxt, MSR_EFER, &efer);
2741 /* inject #GP if in real mode */
2742 if (ctxt->mode == X86EMUL_MODE_REAL)
2743 return emulate_gp(ctxt, 0);
2746 * Not recognized on AMD in compat mode (but is recognized in legacy
2749 if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
2750 && !vendor_intel(ctxt))
2751 return emulate_ud(ctxt);
2753 /* sysenter/sysexit have not been tested in 64bit mode. */
2754 if (ctxt->mode == X86EMUL_MODE_PROT64)
2755 return X86EMUL_UNHANDLEABLE;
2757 setup_syscalls_segments(ctxt, &cs, &ss);
2759 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2760 if ((msr_data & 0xfffc) == 0x0)
2761 return emulate_gp(ctxt, 0);
2763 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2764 cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
2765 ss_sel = cs_sel + 8;
2766 if (efer & EFER_LMA) {
2771 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2772 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2774 ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2775 ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
2777 ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2778 *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
2781 return X86EMUL_CONTINUE;
2784 static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2786 const struct x86_emulate_ops *ops = ctxt->ops;
2787 struct desc_struct cs, ss;
2788 u64 msr_data, rcx, rdx;
2790 u16 cs_sel = 0, ss_sel = 0;
2792 /* inject #GP if in real mode or Virtual 8086 mode */
2793 if (ctxt->mode == X86EMUL_MODE_REAL ||
2794 ctxt->mode == X86EMUL_MODE_VM86)
2795 return emulate_gp(ctxt, 0);
2797 setup_syscalls_segments(ctxt, &cs, &ss);
2799 if ((ctxt->rex_prefix & 0x8) != 0x0)
2800 usermode = X86EMUL_MODE_PROT64;
2802 usermode = X86EMUL_MODE_PROT32;
2804 rcx = reg_read(ctxt, VCPU_REGS_RCX);
2805 rdx = reg_read(ctxt, VCPU_REGS_RDX);
2809 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2811 case X86EMUL_MODE_PROT32:
2812 cs_sel = (u16)(msr_data + 16);
2813 if ((msr_data & 0xfffc) == 0x0)
2814 return emulate_gp(ctxt, 0);
2815 ss_sel = (u16)(msr_data + 24);
2819 case X86EMUL_MODE_PROT64:
2820 cs_sel = (u16)(msr_data + 32);
2821 if (msr_data == 0x0)
2822 return emulate_gp(ctxt, 0);
2823 ss_sel = cs_sel + 8;
2826 if (is_noncanonical_address(rcx) ||
2827 is_noncanonical_address(rdx))
2828 return emulate_gp(ctxt, 0);
2831 cs_sel |= SEGMENT_RPL_MASK;
2832 ss_sel |= SEGMENT_RPL_MASK;
2834 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2835 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2838 *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2840 return X86EMUL_CONTINUE;
2843 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2846 if (ctxt->mode == X86EMUL_MODE_REAL)
2848 if (ctxt->mode == X86EMUL_MODE_VM86)
2850 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
2851 return ctxt->ops->cpl(ctxt) > iopl;
2854 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2857 const struct x86_emulate_ops *ops = ctxt->ops;
2858 struct desc_struct tr_seg;
2861 u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2862 unsigned mask = (1 << len) - 1;
2865 ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2868 if (desc_limit_scaled(&tr_seg) < 103)
2870 base = get_desc_base(&tr_seg);
2871 #ifdef CONFIG_X86_64
2872 base |= ((u64)base3) << 32;
2874 r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL);
2875 if (r != X86EMUL_CONTINUE)
2877 if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2879 r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL);
2880 if (r != X86EMUL_CONTINUE)
2882 if ((perm >> bit_idx) & mask)
2887 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
2893 if (emulator_bad_iopl(ctxt))
2894 if (!emulator_io_port_access_allowed(ctxt, port, len))
2897 ctxt->perm_ok = true;
2902 static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
2905 * Intel CPUs mask the counter and pointers in quite strange
2906 * manner when ECX is zero due to REP-string optimizations.
2908 #ifdef CONFIG_X86_64
2909 if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
2912 *reg_write(ctxt, VCPU_REGS_RCX) = 0;
2915 case 0xa4: /* movsb */
2916 case 0xa5: /* movsd/w */
2917 *reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
2919 case 0xaa: /* stosb */
2920 case 0xab: /* stosd/w */
2921 *reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
2926 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2927 struct tss_segment_16 *tss)
2929 tss->ip = ctxt->_eip;
2930 tss->flag = ctxt->eflags;
2931 tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
2932 tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
2933 tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
2934 tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
2935 tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
2936 tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
2937 tss->si = reg_read(ctxt, VCPU_REGS_RSI);
2938 tss->di = reg_read(ctxt, VCPU_REGS_RDI);
2940 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2941 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2942 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2943 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2944 tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2947 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2948 struct tss_segment_16 *tss)
2953 ctxt->_eip = tss->ip;
2954 ctxt->eflags = tss->flag | 2;
2955 *reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
2956 *reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
2957 *reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
2958 *reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
2959 *reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
2960 *reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
2961 *reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
2962 *reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
2965 * SDM says that segment selectors are loaded before segment
2968 set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
2969 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2970 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2971 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2972 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2977 * Now load segment descriptors. If fault happens at this stage
2978 * it is handled in a context of new task
2980 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2981 X86_TRANSFER_TASK_SWITCH, NULL);
2982 if (ret != X86EMUL_CONTINUE)
2984 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2985 X86_TRANSFER_TASK_SWITCH, NULL);
2986 if (ret != X86EMUL_CONTINUE)
2988 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2989 X86_TRANSFER_TASK_SWITCH, NULL);
2990 if (ret != X86EMUL_CONTINUE)
2992 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2993 X86_TRANSFER_TASK_SWITCH, NULL);
2994 if (ret != X86EMUL_CONTINUE)
2996 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2997 X86_TRANSFER_TASK_SWITCH, NULL);
2998 if (ret != X86EMUL_CONTINUE)
3001 return X86EMUL_CONTINUE;
3004 static int task_switch_16(struct x86_emulate_ctxt *ctxt,
3005 u16 tss_selector, u16 old_tss_sel,
3006 ulong old_tss_base, struct desc_struct *new_desc)
3008 const struct x86_emulate_ops *ops = ctxt->ops;
3009 struct tss_segment_16 tss_seg;
3011 u32 new_tss_base = get_desc_base(new_desc);
3013 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
3015 if (ret != X86EMUL_CONTINUE)
3018 save_state_to_tss16(ctxt, &tss_seg);
3020 ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
3022 if (ret != X86EMUL_CONTINUE)
3025 ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
3027 if (ret != X86EMUL_CONTINUE)
3030 if (old_tss_sel != 0xffff) {
3031 tss_seg.prev_task_link = old_tss_sel;
3033 ret = ops->write_std(ctxt, new_tss_base,
3034 &tss_seg.prev_task_link,
3035 sizeof tss_seg.prev_task_link,
3037 if (ret != X86EMUL_CONTINUE)
3041 return load_state_from_tss16(ctxt, &tss_seg);
3044 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
3045 struct tss_segment_32 *tss)
3047 /* CR3 and ldt selector are not saved intentionally */
3048 tss->eip = ctxt->_eip;
3049 tss->eflags = ctxt->eflags;
3050 tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
3051 tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
3052 tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
3053 tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
3054 tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
3055 tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
3056 tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
3057 tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
3059 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
3060 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
3061 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
3062 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
3063 tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
3064 tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
3067 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
3068 struct tss_segment_32 *tss)
3073 if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
3074 return emulate_gp(ctxt, 0);
3075 ctxt->_eip = tss->eip;
3076 ctxt->eflags = tss->eflags | 2;
3078 /* General purpose registers */
3079 *reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
3080 *reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
3081 *reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
3082 *reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
3083 *reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
3084 *reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
3085 *reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
3086 *reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
3089 * SDM says that segment selectors are loaded before segment
3090 * descriptors. This is important because CPL checks will
3093 set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
3094 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
3095 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
3096 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
3097 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
3098 set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
3099 set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
3102 * If we're switching between Protected Mode and VM86, we need to make
3103 * sure to update the mode before loading the segment descriptors so
3104 * that the selectors are interpreted correctly.
3106 if (ctxt->eflags & X86_EFLAGS_VM) {
3107 ctxt->mode = X86EMUL_MODE_VM86;
3110 ctxt->mode = X86EMUL_MODE_PROT32;
3115 * Now load segment descriptors. If fault happenes at this stage
3116 * it is handled in a context of new task
3118 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
3119 cpl, X86_TRANSFER_TASK_SWITCH, NULL);
3120 if (ret != X86EMUL_CONTINUE)
3122 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
3123 X86_TRANSFER_TASK_SWITCH, NULL);
3124 if (ret != X86EMUL_CONTINUE)
3126 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
3127 X86_TRANSFER_TASK_SWITCH, NULL);
3128 if (ret != X86EMUL_CONTINUE)
3130 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
3131 X86_TRANSFER_TASK_SWITCH, NULL);
3132 if (ret != X86EMUL_CONTINUE)
3134 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
3135 X86_TRANSFER_TASK_SWITCH, NULL);
3136 if (ret != X86EMUL_CONTINUE)
3138 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
3139 X86_TRANSFER_TASK_SWITCH, NULL);
3140 if (ret != X86EMUL_CONTINUE)
3142 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
3143 X86_TRANSFER_TASK_SWITCH, NULL);
3148 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
3149 u16 tss_selector, u16 old_tss_sel,
3150 ulong old_tss_base, struct desc_struct *new_desc)
3152 const struct x86_emulate_ops *ops = ctxt->ops;
3153 struct tss_segment_32 tss_seg;
3155 u32 new_tss_base = get_desc_base(new_desc);
3156 u32 eip_offset = offsetof(struct tss_segment_32, eip);
3157 u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
3159 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
3161 if (ret != X86EMUL_CONTINUE)
3164 save_state_to_tss32(ctxt, &tss_seg);
3166 /* Only GP registers and segment selectors are saved */
3167 ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
3168 ldt_sel_offset - eip_offset, &ctxt->exception);
3169 if (ret != X86EMUL_CONTINUE)
3172 ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
3174 if (ret != X86EMUL_CONTINUE)
3177 if (old_tss_sel != 0xffff) {
3178 tss_seg.prev_task_link = old_tss_sel;
3180 ret = ops->write_std(ctxt, new_tss_base,
3181 &tss_seg.prev_task_link,
3182 sizeof tss_seg.prev_task_link,
3184 if (ret != X86EMUL_CONTINUE)
3188 return load_state_from_tss32(ctxt, &tss_seg);
3191 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
3192 u16 tss_selector, int idt_index, int reason,
3193 bool has_error_code, u32 error_code)
3195 const struct x86_emulate_ops *ops = ctxt->ops;
3196 struct desc_struct curr_tss_desc, next_tss_desc;
3198 u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
3199 ulong old_tss_base =
3200 ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
3202 ulong desc_addr, dr7;
3204 /* FIXME: old_tss_base == ~0 ? */
3206 ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
3207 if (ret != X86EMUL_CONTINUE)
3209 ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
3210 if (ret != X86EMUL_CONTINUE)
3213 /* FIXME: check that next_tss_desc is tss */
3216 * Check privileges. The three cases are task switch caused by...
3218 * 1. jmp/call/int to task gate: Check against DPL of the task gate
3219 * 2. Exception/IRQ/iret: No check is performed
3220 * 3. jmp/call to TSS/task-gate: No check is performed since the
3221 * hardware checks it before exiting.
3223 if (reason == TASK_SWITCH_GATE) {
3224 if (idt_index != -1) {
3225 /* Software interrupts */
3226 struct desc_struct task_gate_desc;
3229 ret = read_interrupt_descriptor(ctxt, idt_index,
3231 if (ret != X86EMUL_CONTINUE)
3234 dpl = task_gate_desc.dpl;
3235 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
3236 return emulate_gp(ctxt, (idt_index << 3) | 0x2);
3240 desc_limit = desc_limit_scaled(&next_tss_desc);
3241 if (!next_tss_desc.p ||
3242 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
3243 desc_limit < 0x2b)) {
3244 return emulate_ts(ctxt, tss_selector & 0xfffc);
3247 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
3248 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
3249 write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
3252 if (reason == TASK_SWITCH_IRET)
3253 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
3255 /* set back link to prev task only if NT bit is set in eflags
3256 note that old_tss_sel is not used after this point */
3257 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
3258 old_tss_sel = 0xffff;
3260 if (next_tss_desc.type & 8)
3261 ret = task_switch_32(ctxt, tss_selector, old_tss_sel,
3262 old_tss_base, &next_tss_desc);
3264 ret = task_switch_16(ctxt, tss_selector, old_tss_sel,
3265 old_tss_base, &next_tss_desc);
3266 if (ret != X86EMUL_CONTINUE)
3269 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
3270 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
3272 if (reason != TASK_SWITCH_IRET) {
3273 next_tss_desc.type |= (1 << 1); /* set busy flag */
3274 write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
3277 ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS);
3278 ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
3280 if (has_error_code) {
3281 ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
3282 ctxt->lock_prefix = 0;
3283 ctxt->src.val = (unsigned long) error_code;
3284 ret = em_push(ctxt);
3287 ops->get_dr(ctxt, 7, &dr7);
3288 ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
3293 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
3294 u16 tss_selector, int idt_index, int reason,
3295 bool has_error_code, u32 error_code)
3299 invalidate_registers(ctxt);
3300 ctxt->_eip = ctxt->eip;
3301 ctxt->dst.type = OP_NONE;
3303 rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
3304 has_error_code, error_code);
3306 if (rc == X86EMUL_CONTINUE) {
3307 ctxt->eip = ctxt->_eip;
3308 writeback_registers(ctxt);
3311 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3314 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
3317 int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
3319 register_address_increment(ctxt, reg, df * op->bytes);
3320 op->addr.mem.ea = register_address(ctxt, reg);
3323 static int em_das(struct x86_emulate_ctxt *ctxt)
3326 bool af, cf, old_cf;
3328 cf = ctxt->eflags & X86_EFLAGS_CF;
3334 af = ctxt->eflags & X86_EFLAGS_AF;
3335 if ((al & 0x0f) > 9 || af) {
3337 cf = old_cf | (al >= 250);
3342 if (old_al > 0x99 || old_cf) {
3348 /* Set PF, ZF, SF */
3349 ctxt->src.type = OP_IMM;
3351 ctxt->src.bytes = 1;
3352 fastop(ctxt, em_or);
3353 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
3355 ctxt->eflags |= X86_EFLAGS_CF;
3357 ctxt->eflags |= X86_EFLAGS_AF;
3358 return X86EMUL_CONTINUE;
3361 static int em_aam(struct x86_emulate_ctxt *ctxt)
3365 if (ctxt->src.val == 0)
3366 return emulate_de(ctxt);
3368 al = ctxt->dst.val & 0xff;
3369 ah = al / ctxt->src.val;
3370 al %= ctxt->src.val;
3372 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
3374 /* Set PF, ZF, SF */
3375 ctxt->src.type = OP_IMM;
3377 ctxt->src.bytes = 1;
3378 fastop(ctxt, em_or);
3380 return X86EMUL_CONTINUE;
3383 static int em_aad(struct x86_emulate_ctxt *ctxt)
3385 u8 al = ctxt->dst.val & 0xff;
3386 u8 ah = (ctxt->dst.val >> 8) & 0xff;
3388 al = (al + (ah * ctxt->src.val)) & 0xff;
3390 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
3392 /* Set PF, ZF, SF */
3393 ctxt->src.type = OP_IMM;
3395 ctxt->src.bytes = 1;
3396 fastop(ctxt, em_or);
3398 return X86EMUL_CONTINUE;
3401 static int em_call(struct x86_emulate_ctxt *ctxt)
3404 long rel = ctxt->src.val;
3406 ctxt->src.val = (unsigned long)ctxt->_eip;
3407 rc = jmp_rel(ctxt, rel);
3408 if (rc != X86EMUL_CONTINUE)
3410 return em_push(ctxt);
3413 static int em_call_far(struct x86_emulate_ctxt *ctxt)
3418 struct desc_struct old_desc, new_desc;
3419 const struct x86_emulate_ops *ops = ctxt->ops;
3420 int cpl = ctxt->ops->cpl(ctxt);
3421 enum x86emul_mode prev_mode = ctxt->mode;
3423 old_eip = ctxt->_eip;
3424 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3426 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3427 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
3428 X86_TRANSFER_CALL_JMP, &new_desc);
3429 if (rc != X86EMUL_CONTINUE)
3432 rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
3433 if (rc != X86EMUL_CONTINUE)
3436 ctxt->src.val = old_cs;
3438 if (rc != X86EMUL_CONTINUE)
3441 ctxt->src.val = old_eip;
3443 /* If we failed, we tainted the memory, but the very least we should
3445 if (rc != X86EMUL_CONTINUE) {
3446 pr_warn_once("faulting far call emulation tainted memory\n");
3451 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3452 ctxt->mode = prev_mode;
3457 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
3462 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
3463 if (rc != X86EMUL_CONTINUE)
3465 rc = assign_eip_near(ctxt, eip);
3466 if (rc != X86EMUL_CONTINUE)
3468 rsp_increment(ctxt, ctxt->src.val);
3469 return X86EMUL_CONTINUE;
3472 static int em_xchg(struct x86_emulate_ctxt *ctxt)
3474 /* Write back the register source. */
3475 ctxt->src.val = ctxt->dst.val;
3476 write_register_operand(&ctxt->src);
3478 /* Write back the memory destination with implicit LOCK prefix. */
3479 ctxt->dst.val = ctxt->src.orig_val;
3480 ctxt->lock_prefix = 1;
3481 return X86EMUL_CONTINUE;
3484 static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
3486 ctxt->dst.val = ctxt->src2.val;
3487 return fastop(ctxt, em_imul);
3490 static int em_cwd(struct x86_emulate_ctxt *ctxt)
3492 ctxt->dst.type = OP_REG;
3493 ctxt->dst.bytes = ctxt->src.bytes;
3494 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3495 ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3497 return X86EMUL_CONTINUE;
3500 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
3504 ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3505 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
3506 *reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3507 return X86EMUL_CONTINUE;
3510 static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
3514 if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3515 return emulate_gp(ctxt, 0);
3516 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
3517 *reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3518 return X86EMUL_CONTINUE;
3521 static int em_mov(struct x86_emulate_ctxt *ctxt)
3523 memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3524 return X86EMUL_CONTINUE;
3527 #define FFL(x) bit(X86_FEATURE_##x)
3529 static int em_movbe(struct x86_emulate_ctxt *ctxt)
3531 u32 ebx, ecx, edx, eax = 1;
3535 * Check MOVBE is set in the guest-visible CPUID leaf.
3537 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
3538 if (!(ecx & FFL(MOVBE)))
3539 return emulate_ud(ctxt);
3541 switch (ctxt->op_bytes) {
3544 * From MOVBE definition: "...When the operand size is 16 bits,
3545 * the upper word of the destination register remains unchanged
3548 * Both casting ->valptr and ->val to u16 breaks strict aliasing
3549 * rules so we have to do the operation almost per hand.
3551 tmp = (u16)ctxt->src.val;
3552 ctxt->dst.val &= ~0xffffUL;
3553 ctxt->dst.val |= (unsigned long)swab16(tmp);
3556 ctxt->dst.val = swab32((u32)ctxt->src.val);
3559 ctxt->dst.val = swab64(ctxt->src.val);
3564 return X86EMUL_CONTINUE;
3567 static int em_cr_write(struct x86_emulate_ctxt *ctxt)
3569 if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
3570 return emulate_gp(ctxt, 0);
3572 /* Disable writeback. */
3573 ctxt->dst.type = OP_NONE;
3574 return X86EMUL_CONTINUE;
3577 static int em_dr_write(struct x86_emulate_ctxt *ctxt)
3581 if (ctxt->mode == X86EMUL_MODE_PROT64)
3582 val = ctxt->src.val & ~0ULL;
3584 val = ctxt->src.val & ~0U;
3586 /* #UD condition is already handled. */
3587 if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
3588 return emulate_gp(ctxt, 0);
3590 /* Disable writeback. */
3591 ctxt->dst.type = OP_NONE;
3592 return X86EMUL_CONTINUE;
3595 static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
3599 msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
3600 | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
3601 if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data))
3602 return emulate_gp(ctxt, 0);
3604 return X86EMUL_CONTINUE;
3607 static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
3611 if (ctxt->ops->get_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &msr_data))
3612 return emulate_gp(ctxt, 0);
3614 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
3615 *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3616 return X86EMUL_CONTINUE;
3619 static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
3621 if (ctxt->modrm_reg > VCPU_SREG_GS)
3622 return emulate_ud(ctxt);
3624 ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg);
3625 if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
3626 ctxt->dst.bytes = 2;
3627 return X86EMUL_CONTINUE;
3630 static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
3632 u16 sel = ctxt->src.val;
3634 if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3635 return emulate_ud(ctxt);
3637 if (ctxt->modrm_reg == VCPU_SREG_SS)
3638 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3640 /* Disable writeback. */
3641 ctxt->dst.type = OP_NONE;
3642 return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3645 static int em_lldt(struct x86_emulate_ctxt *ctxt)
3647 u16 sel = ctxt->src.val;
3649 /* Disable writeback. */
3650 ctxt->dst.type = OP_NONE;
3651 return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
3654 static int em_ltr(struct x86_emulate_ctxt *ctxt)
3656 u16 sel = ctxt->src.val;
3658 /* Disable writeback. */
3659 ctxt->dst.type = OP_NONE;
3660 return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
3663 static int em_invlpg(struct x86_emulate_ctxt *ctxt)
3668 rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
3669 if (rc == X86EMUL_CONTINUE)
3670 ctxt->ops->invlpg(ctxt, linear);
3671 /* Disable writeback. */
3672 ctxt->dst.type = OP_NONE;
3673 return X86EMUL_CONTINUE;
3676 static int em_clts(struct x86_emulate_ctxt *ctxt)
3680 cr0 = ctxt->ops->get_cr(ctxt, 0);
3682 ctxt->ops->set_cr(ctxt, 0, cr0);
3683 return X86EMUL_CONTINUE;
3686 static int em_hypercall(struct x86_emulate_ctxt *ctxt)
3688 int rc = ctxt->ops->fix_hypercall(ctxt);
3690 if (rc != X86EMUL_CONTINUE)
3693 /* Let the processor re-execute the fixed hypercall */
3694 ctxt->_eip = ctxt->eip;
3695 /* Disable writeback. */
3696 ctxt->dst.type = OP_NONE;
3697 return X86EMUL_CONTINUE;
3700 static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
3701 void (*get)(struct x86_emulate_ctxt *ctxt,
3702 struct desc_ptr *ptr))
3704 struct desc_ptr desc_ptr;
3706 if (ctxt->mode == X86EMUL_MODE_PROT64)
3708 get(ctxt, &desc_ptr);
3709 if (ctxt->op_bytes == 2) {
3711 desc_ptr.address &= 0x00ffffff;
3713 /* Disable writeback. */
3714 ctxt->dst.type = OP_NONE;
3715 return segmented_write_std(ctxt, ctxt->dst.addr.mem,
3716 &desc_ptr, 2 + ctxt->op_bytes);
3719 static int em_sgdt(struct x86_emulate_ctxt *ctxt)
3721 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
3724 static int em_sidt(struct x86_emulate_ctxt *ctxt)
3726 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
3729 static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3731 struct desc_ptr desc_ptr;
3734 if (ctxt->mode == X86EMUL_MODE_PROT64)
3736 rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3737 &desc_ptr.size, &desc_ptr.address,
3739 if (rc != X86EMUL_CONTINUE)
3741 if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3742 is_noncanonical_address(desc_ptr.address))
3743 return emulate_gp(ctxt, 0);
3745 ctxt->ops->set_gdt(ctxt, &desc_ptr);
3747 ctxt->ops->set_idt(ctxt, &desc_ptr);
3748 /* Disable writeback. */
3749 ctxt->dst.type = OP_NONE;
3750 return X86EMUL_CONTINUE;
3753 static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3755 return em_lgdt_lidt(ctxt, true);
3758 static int em_lidt(struct x86_emulate_ctxt *ctxt)
3760 return em_lgdt_lidt(ctxt, false);
3763 static int em_smsw(struct x86_emulate_ctxt *ctxt)
3765 if (ctxt->dst.type == OP_MEM)
3766 ctxt->dst.bytes = 2;
3767 ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3768 return X86EMUL_CONTINUE;
3771 static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3773 ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3774 | (ctxt->src.val & 0x0f));
3775 ctxt->dst.type = OP_NONE;
3776 return X86EMUL_CONTINUE;
3779 static int em_loop(struct x86_emulate_ctxt *ctxt)
3781 int rc = X86EMUL_CONTINUE;
3783 register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3784 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3785 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3786 rc = jmp_rel(ctxt, ctxt->src.val);
3791 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3793 int rc = X86EMUL_CONTINUE;
3795 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3796 rc = jmp_rel(ctxt, ctxt->src.val);
3801 static int em_in(struct x86_emulate_ctxt *ctxt)
3803 if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
3805 return X86EMUL_IO_NEEDED;
3807 return X86EMUL_CONTINUE;
3810 static int em_out(struct x86_emulate_ctxt *ctxt)
3812 ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
3814 /* Disable writeback. */
3815 ctxt->dst.type = OP_NONE;
3816 return X86EMUL_CONTINUE;
3819 static int em_cli(struct x86_emulate_ctxt *ctxt)
3821 if (emulator_bad_iopl(ctxt))
3822 return emulate_gp(ctxt, 0);
3824 ctxt->eflags &= ~X86_EFLAGS_IF;
3825 return X86EMUL_CONTINUE;
3828 static int em_sti(struct x86_emulate_ctxt *ctxt)
3830 if (emulator_bad_iopl(ctxt))
3831 return emulate_gp(ctxt, 0);
3833 ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3834 ctxt->eflags |= X86_EFLAGS_IF;
3835 return X86EMUL_CONTINUE;
3838 static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3840 u32 eax, ebx, ecx, edx;
3842 eax = reg_read(ctxt, VCPU_REGS_RAX);
3843 ecx = reg_read(ctxt, VCPU_REGS_RCX);
3844 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
3845 *reg_write(ctxt, VCPU_REGS_RAX) = eax;
3846 *reg_write(ctxt, VCPU_REGS_RBX) = ebx;
3847 *reg_write(ctxt, VCPU_REGS_RCX) = ecx;
3848 *reg_write(ctxt, VCPU_REGS_RDX) = edx;
3849 return X86EMUL_CONTINUE;
3852 static int em_sahf(struct x86_emulate_ctxt *ctxt)
3856 flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
3858 flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3860 ctxt->eflags &= ~0xffUL;
3861 ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3862 return X86EMUL_CONTINUE;
3865 static int em_lahf(struct x86_emulate_ctxt *ctxt)
3867 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
3868 *reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
3869 return X86EMUL_CONTINUE;
3872 static int em_bswap(struct x86_emulate_ctxt *ctxt)
3874 switch (ctxt->op_bytes) {
3875 #ifdef CONFIG_X86_64
3877 asm("bswap %0" : "+r"(ctxt->dst.val));
3881 asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
3884 return X86EMUL_CONTINUE;
3887 static int em_clflush(struct x86_emulate_ctxt *ctxt)
3889 /* emulating clflush regardless of cpuid */
3890 return X86EMUL_CONTINUE;
3893 static int em_movsxd(struct x86_emulate_ctxt *ctxt)
3895 ctxt->dst.val = (s32) ctxt->src.val;
3896 return X86EMUL_CONTINUE;
3899 static int check_fxsr(struct x86_emulate_ctxt *ctxt)
3901 u32 eax = 1, ebx, ecx = 0, edx;
3903 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
3904 if (!(edx & FFL(FXSR)))
3905 return emulate_ud(ctxt);
3907 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
3908 return emulate_nm(ctxt);
3911 * Don't emulate a case that should never be hit, instead of working
3912 * around a lack of fxsave64/fxrstor64 on old compilers.
3914 if (ctxt->mode >= X86EMUL_MODE_PROT64)
3915 return X86EMUL_UNHANDLEABLE;
3917 return X86EMUL_CONTINUE;
3921 * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
3924 * - like (1), but FIP and FDP (foo) are only 16 bit. At least Intel CPUs
3925 * preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
3927 * 3) 64-bit mode with REX.W prefix
3928 * - like (2), but XMM 8-15 are being saved and restored
3929 * 4) 64-bit mode without REX.W prefix
3930 * - like (3), but FIP and FDP are 64 bit
3932 * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
3933 * desired result. (4) is not emulated.
3935 * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
3936 * and FPU DS) should match.
3938 static int em_fxsave(struct x86_emulate_ctxt *ctxt)
3940 struct fxregs_state fx_state;
3944 rc = check_fxsr(ctxt);
3945 if (rc != X86EMUL_CONTINUE)
3948 ctxt->ops->get_fpu(ctxt);
3950 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
3952 ctxt->ops->put_fpu(ctxt);
3954 if (rc != X86EMUL_CONTINUE)
3957 if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)
3958 size = offsetof(struct fxregs_state, xmm_space[8 * 16/4]);
3960 size = offsetof(struct fxregs_state, xmm_space[0]);
3962 return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
3965 static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt,
3966 struct fxregs_state *new)
3968 int rc = X86EMUL_CONTINUE;
3969 struct fxregs_state old;
3971 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(old));
3972 if (rc != X86EMUL_CONTINUE)
3976 * 64 bit host will restore XMM 8-15, which is not correct on non-64
3977 * bit guests. Load the current values in order to preserve 64 bit
3978 * XMMs after fxrstor.
3980 #ifdef CONFIG_X86_64
3981 /* XXX: accessing XMM 8-15 very awkwardly */
3982 memcpy(&new->xmm_space[8 * 16/4], &old.xmm_space[8 * 16/4], 8 * 16);
3986 * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but
3987 * does save and restore MXCSR.
3989 if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))
3990 memcpy(new->xmm_space, old.xmm_space, 8 * 16);
3995 static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
3997 struct fxregs_state fx_state;
4000 rc = check_fxsr(ctxt);
4001 if (rc != X86EMUL_CONTINUE)
4004 rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, 512);
4005 if (rc != X86EMUL_CONTINUE)
4008 if (fx_state.mxcsr >> 16)
4009 return emulate_gp(ctxt, 0);
4011 ctxt->ops->get_fpu(ctxt);
4013 if (ctxt->mode < X86EMUL_MODE_PROT64)
4014 rc = fxrstor_fixup(ctxt, &fx_state);
4016 if (rc == X86EMUL_CONTINUE)
4017 rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
4019 ctxt->ops->put_fpu(ctxt);
4024 static bool valid_cr(int nr)
4036 static int check_cr_read(struct x86_emulate_ctxt *ctxt)
4038 if (!valid_cr(ctxt->modrm_reg))
4039 return emulate_ud(ctxt);
4041 return X86EMUL_CONTINUE;
4044 static int check_cr_write(struct x86_emulate_ctxt *ctxt)
4046 u64 new_val = ctxt->src.val64;
4047 int cr = ctxt->modrm_reg;
4050 static u64 cr_reserved_bits[] = {
4051 0xffffffff00000000ULL,
4052 0, 0, 0, /* CR3 checked later */
4059 return emulate_ud(ctxt);
4061 if (new_val & cr_reserved_bits[cr])
4062 return emulate_gp(ctxt, 0);
4067 if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) ||
4068 ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD)))
4069 return emulate_gp(ctxt, 0);
4071 cr4 = ctxt->ops->get_cr(ctxt, 4);
4072 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4074 if ((new_val & X86_CR0_PG) && (efer & EFER_LME) &&
4075 !(cr4 & X86_CR4_PAE))
4076 return emulate_gp(ctxt, 0);
4083 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4084 if (efer & EFER_LMA)
4085 rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
4088 return emulate_gp(ctxt, 0);
4093 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4095 if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE))
4096 return emulate_gp(ctxt, 0);
4102 return X86EMUL_CONTINUE;
4105 static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
4109 ctxt->ops->get_dr(ctxt, 7, &dr7);
4111 /* Check if DR7.Global_Enable is set */
4112 return dr7 & (1 << 13);
4115 static int check_dr_read(struct x86_emulate_ctxt *ctxt)
4117 int dr = ctxt->modrm_reg;
4121 return emulate_ud(ctxt);
4123 cr4 = ctxt->ops->get_cr(ctxt, 4);
4124 if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
4125 return emulate_ud(ctxt);
4127 if (check_dr7_gd(ctxt)) {
4130 ctxt->ops->get_dr(ctxt, 6, &dr6);
4132 dr6 |= DR6_BD | DR6_RTM;
4133 ctxt->ops->set_dr(ctxt, 6, dr6);
4134 return emulate_db(ctxt);
4137 return X86EMUL_CONTINUE;
4140 static int check_dr_write(struct x86_emulate_ctxt *ctxt)
4142 u64 new_val = ctxt->src.val64;
4143 int dr = ctxt->modrm_reg;
4145 if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
4146 return emulate_gp(ctxt, 0);
4148 return check_dr_read(ctxt);
4151 static int check_svme(struct x86_emulate_ctxt *ctxt)
4155 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4157 if (!(efer & EFER_SVME))
4158 return emulate_ud(ctxt);
4160 return X86EMUL_CONTINUE;
4163 static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
4165 u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
4167 /* Valid physical address? */
4168 if (rax & 0xffff000000000000ULL)
4169 return emulate_gp(ctxt, 0);
4171 return check_svme(ctxt);
4174 static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
4176 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4178 if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
4179 return emulate_ud(ctxt);
4181 return X86EMUL_CONTINUE;
4184 static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
4186 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4187 u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
4189 if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
4190 ctxt->ops->check_pmc(ctxt, rcx))
4191 return emulate_gp(ctxt, 0);
4193 return X86EMUL_CONTINUE;
4196 static int check_perm_in(struct x86_emulate_ctxt *ctxt)
4198 ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
4199 if (!emulator_io_permited(ctxt, ctxt->src.val, ctxt->dst.bytes))
4200 return emulate_gp(ctxt, 0);
4202 return X86EMUL_CONTINUE;
4205 static int check_perm_out(struct x86_emulate_ctxt *ctxt)
4207 ctxt->src.bytes = min(ctxt->src.bytes, 4u);
4208 if (!emulator_io_permited(ctxt, ctxt->dst.val, ctxt->src.bytes))
4209 return emulate_gp(ctxt, 0);
4211 return X86EMUL_CONTINUE;
4214 #define D(_y) { .flags = (_y) }
4215 #define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
4216 #define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
4217 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4218 #define N D(NotImpl)
4219 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
4220 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
4221 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
4222 #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
4223 #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
4224 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
4225 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
4226 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
4227 #define II(_f, _e, _i) \
4228 { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
4229 #define IIP(_f, _e, _i, _p) \
4230 { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
4231 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4232 #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
4234 #define D2bv(_f) D((_f) | ByteOp), D(_f)
4235 #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
4236 #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
4237 #define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
4238 #define I2bvIP(_f, _e, _i, _p) \
4239 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
4241 #define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
4242 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
4243 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
4245 static const struct opcode group7_rm0[] = {
4247 I(SrcNone | Priv | EmulateOnUD, em_hypercall),
4251 static const struct opcode group7_rm1[] = {
4252 DI(SrcNone | Priv, monitor),
4253 DI(SrcNone | Priv, mwait),
4257 static const struct opcode group7_rm3[] = {
4258 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
4259 II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall),
4260 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
4261 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
4262 DIP(SrcNone | Prot | Priv, stgi, check_svme),
4263 DIP(SrcNone | Prot | Priv, clgi, check_svme),
4264 DIP(SrcNone | Prot | Priv, skinit, check_svme),
4265 DIP(SrcNone | Prot | Priv, invlpga, check_svme),
4268 static const struct opcode group7_rm7[] = {
4270 DIP(SrcNone, rdtscp, check_rdtsc),
4274 static const struct opcode group1[] = {
4276 F(Lock | PageTable, em_or),
4279 F(Lock | PageTable, em_and),
4285 static const struct opcode group1A[] = {
4286 I(DstMem | SrcNone | Mov | Stack | IncSP, em_pop), N, N, N, N, N, N, N,
4289 static const struct opcode group2[] = {
4290 F(DstMem | ModRM, em_rol),
4291 F(DstMem | ModRM, em_ror),
4292 F(DstMem | ModRM, em_rcl),
4293 F(DstMem | ModRM, em_rcr),
4294 F(DstMem | ModRM, em_shl),
4295 F(DstMem | ModRM, em_shr),
4296 F(DstMem | ModRM, em_shl),
4297 F(DstMem | ModRM, em_sar),
4300 static const struct opcode group3[] = {
4301 F(DstMem | SrcImm | NoWrite, em_test),
4302 F(DstMem | SrcImm | NoWrite, em_test),
4303 F(DstMem | SrcNone | Lock, em_not),
4304 F(DstMem | SrcNone | Lock, em_neg),
4305 F(DstXacc | Src2Mem, em_mul_ex),
4306 F(DstXacc | Src2Mem, em_imul_ex),
4307 F(DstXacc | Src2Mem, em_div_ex),
4308 F(DstXacc | Src2Mem, em_idiv_ex),
4311 static const struct opcode group4[] = {
4312 F(ByteOp | DstMem | SrcNone | Lock, em_inc),
4313 F(ByteOp | DstMem | SrcNone | Lock, em_dec),
4317 static const struct opcode group5[] = {
4318 F(DstMem | SrcNone | Lock, em_inc),
4319 F(DstMem | SrcNone | Lock, em_dec),
4320 I(SrcMem | NearBranch, em_call_near_abs),
4321 I(SrcMemFAddr | ImplicitOps, em_call_far),
4322 I(SrcMem | NearBranch, em_jmp_abs),
4323 I(SrcMemFAddr | ImplicitOps, em_jmp_far),
4324 I(SrcMem | Stack, em_push), D(Undefined),
4327 static const struct opcode group6[] = {
4328 DI(Prot | DstMem, sldt),
4329 DI(Prot | DstMem, str),
4330 II(Prot | Priv | SrcMem16, em_lldt, lldt),
4331 II(Prot | Priv | SrcMem16, em_ltr, ltr),
4335 static const struct group_dual group7 = { {
4336 II(Mov | DstMem, em_sgdt, sgdt),
4337 II(Mov | DstMem, em_sidt, sidt),
4338 II(SrcMem | Priv, em_lgdt, lgdt),
4339 II(SrcMem | Priv, em_lidt, lidt),
4340 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4341 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4342 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
4346 N, EXT(0, group7_rm3),
4347 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4348 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4352 static const struct opcode group8[] = {
4354 F(DstMem | SrcImmByte | NoWrite, em_bt),
4355 F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
4356 F(DstMem | SrcImmByte | Lock, em_btr),
4357 F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
4360 static const struct group_dual group9 = { {
4361 N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
4363 N, N, N, N, N, N, N, N,
4366 static const struct opcode group11[] = {
4367 I(DstMem | SrcImm | Mov | PageTable, em_mov),
4371 static const struct gprefix pfx_0f_ae_7 = {
4372 I(SrcMem | ByteOp, em_clflush), N, N, N,
4375 static const struct group_dual group15 = { {
4376 I(ModRM | Aligned16, em_fxsave),
4377 I(ModRM | Aligned16, em_fxrstor),
4378 N, N, N, N, N, GP(0, &pfx_0f_ae_7),
4380 N, N, N, N, N, N, N, N,
4383 static const struct gprefix pfx_0f_6f_0f_7f = {
4384 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
4387 static const struct instr_dual instr_dual_0f_2b = {
4391 static const struct gprefix pfx_0f_2b = {
4392 ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
4395 static const struct gprefix pfx_0f_28_0f_29 = {
4396 I(Aligned, em_mov), I(Aligned, em_mov), N, N,
4399 static const struct gprefix pfx_0f_e7 = {
4400 N, I(Sse, em_mov), N, N,
4403 static const struct escape escape_d9 = { {
4404 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
4407 N, N, N, N, N, N, N, N,
4409 N, N, N, N, N, N, N, N,
4411 N, N, N, N, N, N, N, N,
4413 N, N, N, N, N, N, N, N,
4415 N, N, N, N, N, N, N, N,
4417 N, N, N, N, N, N, N, N,
4419 N, N, N, N, N, N, N, N,
4421 N, N, N, N, N, N, N, N,
4424 static const struct escape escape_db = { {
4425 N, N, N, N, N, N, N, N,
4428 N, N, N, N, N, N, N, N,
4430 N, N, N, N, N, N, N, N,
4432 N, N, N, N, N, N, N, N,
4434 N, N, N, N, N, N, N, N,
4436 N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
4438 N, N, N, N, N, N, N, N,
4440 N, N, N, N, N, N, N, N,
4442 N, N, N, N, N, N, N, N,
4445 static const struct escape escape_dd = { {
4446 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
4449 N, N, N, N, N, N, N, N,
4451 N, N, N, N, N, N, N, N,
4453 N, N, N, N, N, N, N, N,
4455 N, N, N, N, N, N, N, N,
4457 N, N, N, N, N, N, N, N,
4459 N, N, N, N, N, N, N, N,
4461 N, N, N, N, N, N, N, N,
4463 N, N, N, N, N, N, N, N,
4466 static const struct instr_dual instr_dual_0f_c3 = {
4467 I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
4470 static const struct mode_dual mode_dual_63 = {
4471 N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
4474 static const struct opcode opcode_table[256] = {
4476 F6ALU(Lock, em_add),
4477 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
4478 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
4480 F6ALU(Lock | PageTable, em_or),
4481 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
4484 F6ALU(Lock, em_adc),
4485 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
4486 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
4488 F6ALU(Lock, em_sbb),
4489 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
4490 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
4492 F6ALU(Lock | PageTable, em_and), N, N,
4494 F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
4496 F6ALU(Lock, em_xor), N, N,
4498 F6ALU(NoWrite, em_cmp), N, N,
4500 X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
4502 X8(I(SrcReg | Stack, em_push)),
4504 X8(I(DstReg | Stack, em_pop)),
4506 I(ImplicitOps | Stack | No64, em_pusha),
4507 I(ImplicitOps | Stack | No64, em_popa),
4508 N, MD(ModRM, &mode_dual_63),
4511 I(SrcImm | Mov | Stack, em_push),
4512 I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
4513 I(SrcImmByte | Mov | Stack, em_push),
4514 I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
4515 I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
4516 I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
4518 X16(D(SrcImmByte | NearBranch)),
4520 G(ByteOp | DstMem | SrcImm, group1),
4521 G(DstMem | SrcImm, group1),
4522 G(ByteOp | DstMem | SrcImm | No64, group1),
4523 G(DstMem | SrcImmByte, group1),
4524 F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
4525 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
4527 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
4528 I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
4529 I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
4530 D(ModRM | SrcMem | NoAccess | DstReg),
4531 I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
4534 DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
4536 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
4537 I(SrcImmFAddr | No64, em_call_far), N,
4538 II(ImplicitOps | Stack, em_pushf, pushf),
4539 II(ImplicitOps | Stack, em_popf, popf),
4540 I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
4542 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
4543 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
4544 I2bv(SrcSI | DstDI | Mov | String, em_mov),
4545 F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r),
4547 F2bv(DstAcc | SrcImm | NoWrite, em_test),
4548 I2bv(SrcAcc | DstDI | Mov | String, em_mov),
4549 I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4550 F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4552 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4554 X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4556 G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4557 I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
4558 I(ImplicitOps | NearBranch, em_ret),
4559 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
4560 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4561 G(ByteOp, group11), G(0, group11),
4563 I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
4564 I(ImplicitOps | SrcImmU16, em_ret_far_imm),
4565 I(ImplicitOps, em_ret_far),
4566 D(ImplicitOps), DI(SrcImmByte, intn),
4567 D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
4569 G(Src2One | ByteOp, group2), G(Src2One, group2),
4570 G(Src2CL | ByteOp, group2), G(Src2CL, group2),
4571 I(DstAcc | SrcImmUByte | No64, em_aam),
4572 I(DstAcc | SrcImmUByte | No64, em_aad),
4573 F(DstAcc | ByteOp | No64, em_salc),
4574 I(DstAcc | SrcXLat | ByteOp, em_mov),
4576 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4578 X3(I(SrcImmByte | NearBranch, em_loop)),
4579 I(SrcImmByte | NearBranch, em_jcxz),
4580 I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
4581 I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4583 I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
4584 I(SrcImmFAddr | No64, em_jmp_far),
4585 D(SrcImmByte | ImplicitOps | NearBranch),
4586 I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
4587 I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4589 N, DI(ImplicitOps, icebp), N, N,
4590 DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
4591 G(ByteOp, group3), G(0, group3),
4593 D(ImplicitOps), D(ImplicitOps),
4594 I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4595 D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
4598 static const struct opcode twobyte_table[256] = {
4600 G(0, group6), GD(0, &group7), N, N,
4601 N, I(ImplicitOps | EmulateOnUD, em_syscall),
4602 II(ImplicitOps | Priv, em_clts, clts), N,
4603 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4604 N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4606 N, N, N, N, N, N, N, N,
4607 D(ImplicitOps | ModRM | SrcMem | NoAccess),
4608 N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
4610 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
4611 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
4612 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
4614 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
4617 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
4618 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
4619 N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
4622 II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4623 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4624 II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4625 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4626 I(ImplicitOps | EmulateOnUD, em_sysenter),
4627 I(ImplicitOps | Priv | EmulateOnUD, em_sysexit),
4629 N, N, N, N, N, N, N, N,
4631 X16(D(DstReg | SrcMem | ModRM)),
4633 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4638 N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4643 N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4645 X16(D(SrcImm | NearBranch)),
4647 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4649 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4650 II(ImplicitOps, em_cpuid, cpuid),
4651 F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4652 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4653 F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4655 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4656 II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
4657 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4658 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
4659 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4660 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
4662 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4663 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4664 F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4665 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
4666 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4667 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4671 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4672 I(DstReg | SrcMem | ModRM, em_bsf_c),
4673 I(DstReg | SrcMem | ModRM, em_bsr_c),
4674 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4676 F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4677 N, ID(0, &instr_dual_0f_c3),
4678 N, N, N, GD(0, &group9),
4680 X8(I(DstReg, em_bswap)),
4682 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4684 N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
4685 N, N, N, N, N, N, N, N,
4687 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
4690 static const struct instr_dual instr_dual_0f_38_f0 = {
4691 I(DstReg | SrcMem | Mov, em_movbe), N
4694 static const struct instr_dual instr_dual_0f_38_f1 = {
4695 I(DstMem | SrcReg | Mov, em_movbe), N
4698 static const struct gprefix three_byte_0f_38_f0 = {
4699 ID(0, &instr_dual_0f_38_f0), N, N, N
4702 static const struct gprefix three_byte_0f_38_f1 = {
4703 ID(0, &instr_dual_0f_38_f1), N, N, N
4707 * Insns below are selected by the prefix which indexed by the third opcode
4710 static const struct opcode opcode_map_0f_38[256] = {
4712 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4714 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4716 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
4717 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
4738 static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4742 size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4748 static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
4749 unsigned size, bool sign_extension)
4751 int rc = X86EMUL_CONTINUE;
4755 op->addr.mem.ea = ctxt->_eip;
4756 /* NB. Immediates are sign-extended as necessary. */
4757 switch (op->bytes) {
4759 op->val = insn_fetch(s8, ctxt);
4762 op->val = insn_fetch(s16, ctxt);
4765 op->val = insn_fetch(s32, ctxt);
4768 op->val = insn_fetch(s64, ctxt);
4771 if (!sign_extension) {
4772 switch (op->bytes) {
4780 op->val &= 0xffffffff;
4788 static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4791 int rc = X86EMUL_CONTINUE;
4795 decode_register_operand(ctxt, op);
4798 rc = decode_imm(ctxt, op, 1, false);
4801 ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4805 if (ctxt->d & BitOp)
4806 fetch_bit_operand(ctxt);
4807 op->orig_val = op->val;
4810 ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4814 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4815 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4816 fetch_register_operand(op);
4817 op->orig_val = op->val;
4821 op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
4822 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4823 fetch_register_operand(op);
4824 op->orig_val = op->val;
4827 if (ctxt->d & ByteOp) {
4832 op->bytes = ctxt->op_bytes;
4833 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4834 fetch_register_operand(op);
4835 op->orig_val = op->val;
4839 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4841 register_address(ctxt, VCPU_REGS_RDI);
4842 op->addr.mem.seg = VCPU_SREG_ES;
4849 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4850 fetch_register_operand(op);
4855 op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4858 rc = decode_imm(ctxt, op, 1, true);
4866 rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4869 rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4872 ctxt->memop.bytes = 1;
4873 if (ctxt->memop.type == OP_REG) {
4874 ctxt->memop.addr.reg = decode_register(ctxt,
4875 ctxt->modrm_rm, true);
4876 fetch_register_operand(&ctxt->memop);
4880 ctxt->memop.bytes = 2;
4883 ctxt->memop.bytes = 4;
4886 rc = decode_imm(ctxt, op, 2, false);
4889 rc = decode_imm(ctxt, op, imm_size(ctxt), false);
4893 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4895 register_address(ctxt, VCPU_REGS_RSI);
4896 op->addr.mem.seg = ctxt->seg_override;
4902 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4905 reg_read(ctxt, VCPU_REGS_RBX) +
4906 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4907 op->addr.mem.seg = ctxt->seg_override;
4912 op->addr.mem.ea = ctxt->_eip;
4913 op->bytes = ctxt->op_bytes + 2;
4914 insn_fetch_arr(op->valptr, op->bytes, ctxt);
4917 ctxt->memop.bytes = ctxt->op_bytes + 2;
4921 op->val = VCPU_SREG_ES;
4925 op->val = VCPU_SREG_CS;
4929 op->val = VCPU_SREG_SS;
4933 op->val = VCPU_SREG_DS;
4937 op->val = VCPU_SREG_FS;
4941 op->val = VCPU_SREG_GS;
4944 /* Special instructions do their own operand decoding. */
4946 op->type = OP_NONE; /* Disable writeback. */
4954 int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
4956 int rc = X86EMUL_CONTINUE;
4957 int mode = ctxt->mode;
4958 int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
4959 bool op_prefix = false;
4960 bool has_seg_override = false;
4961 struct opcode opcode;
4963 ctxt->memop.type = OP_NONE;
4964 ctxt->memopp = NULL;
4965 ctxt->_eip = ctxt->eip;
4966 ctxt->fetch.ptr = ctxt->fetch.data;
4967 ctxt->fetch.end = ctxt->fetch.data + insn_len;
4968 ctxt->opcode_len = 1;
4970 memcpy(ctxt->fetch.data, insn, insn_len);
4972 rc = __do_insn_fetch_bytes(ctxt, 1);
4973 if (rc != X86EMUL_CONTINUE)
4978 case X86EMUL_MODE_REAL:
4979 case X86EMUL_MODE_VM86:
4980 case X86EMUL_MODE_PROT16:
4981 def_op_bytes = def_ad_bytes = 2;
4983 case X86EMUL_MODE_PROT32:
4984 def_op_bytes = def_ad_bytes = 4;
4986 #ifdef CONFIG_X86_64
4987 case X86EMUL_MODE_PROT64:
4993 return EMULATION_FAILED;
4996 ctxt->op_bytes = def_op_bytes;
4997 ctxt->ad_bytes = def_ad_bytes;
4999 /* Legacy prefixes. */
5001 switch (ctxt->b = insn_fetch(u8, ctxt)) {
5002 case 0x66: /* operand-size override */
5004 /* switch between 2/4 bytes */
5005 ctxt->op_bytes = def_op_bytes ^ 6;
5007 case 0x67: /* address-size override */
5008 if (mode == X86EMUL_MODE_PROT64)
5009 /* switch between 4/8 bytes */
5010 ctxt->ad_bytes = def_ad_bytes ^ 12;
5012 /* switch between 2/4 bytes */
5013 ctxt->ad_bytes = def_ad_bytes ^ 6;
5015 case 0x26: /* ES override */
5016 case 0x2e: /* CS override */
5017 case 0x36: /* SS override */
5018 case 0x3e: /* DS override */
5019 has_seg_override = true;
5020 ctxt->seg_override = (ctxt->b >> 3) & 3;
5022 case 0x64: /* FS override */
5023 case 0x65: /* GS override */
5024 has_seg_override = true;
5025 ctxt->seg_override = ctxt->b & 7;
5027 case 0x40 ... 0x4f: /* REX */
5028 if (mode != X86EMUL_MODE_PROT64)
5030 ctxt->rex_prefix = ctxt->b;
5032 case 0xf0: /* LOCK */
5033 ctxt->lock_prefix = 1;
5035 case 0xf2: /* REPNE/REPNZ */
5036 case 0xf3: /* REP/REPE/REPZ */
5037 ctxt->rep_prefix = ctxt->b;
5043 /* Any legacy prefix after a REX prefix nullifies its effect. */
5045 ctxt->rex_prefix = 0;
5051 if (ctxt->rex_prefix & 8)
5052 ctxt->op_bytes = 8; /* REX.W */
5054 /* Opcode byte(s). */
5055 opcode = opcode_table[ctxt->b];
5056 /* Two-byte opcode? */
5057 if (ctxt->b == 0x0f) {
5058 ctxt->opcode_len = 2;
5059 ctxt->b = insn_fetch(u8, ctxt);
5060 opcode = twobyte_table[ctxt->b];
5062 /* 0F_38 opcode map */
5063 if (ctxt->b == 0x38) {
5064 ctxt->opcode_len = 3;
5065 ctxt->b = insn_fetch(u8, ctxt);
5066 opcode = opcode_map_0f_38[ctxt->b];
5069 ctxt->d = opcode.flags;
5071 if (ctxt->d & ModRM)
5072 ctxt->modrm = insn_fetch(u8, ctxt);
5074 /* vex-prefix instructions are not implemented */
5075 if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
5076 (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
5080 while (ctxt->d & GroupMask) {
5081 switch (ctxt->d & GroupMask) {
5083 goffset = (ctxt->modrm >> 3) & 7;
5084 opcode = opcode.u.group[goffset];
5087 goffset = (ctxt->modrm >> 3) & 7;
5088 if ((ctxt->modrm >> 6) == 3)
5089 opcode = opcode.u.gdual->mod3[goffset];
5091 opcode = opcode.u.gdual->mod012[goffset];
5094 goffset = ctxt->modrm & 7;
5095 opcode = opcode.u.group[goffset];
5098 if (ctxt->rep_prefix && op_prefix)
5099 return EMULATION_FAILED;
5100 simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
5101 switch (simd_prefix) {
5102 case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
5103 case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
5104 case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
5105 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
5109 if (ctxt->modrm > 0xbf)
5110 opcode = opcode.u.esc->high[ctxt->modrm - 0xc0];
5112 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
5115 if ((ctxt->modrm >> 6) == 3)
5116 opcode = opcode.u.idual->mod3;
5118 opcode = opcode.u.idual->mod012;
5121 if (ctxt->mode == X86EMUL_MODE_PROT64)
5122 opcode = opcode.u.mdual->mode64;
5124 opcode = opcode.u.mdual->mode32;
5127 return EMULATION_FAILED;
5130 ctxt->d &= ~(u64)GroupMask;
5131 ctxt->d |= opcode.flags;
5136 return EMULATION_FAILED;
5138 ctxt->execute = opcode.u.execute;
5140 if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD)))
5141 return EMULATION_FAILED;
5143 if (unlikely(ctxt->d &
5144 (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
5147 * These are copied unconditionally here, and checked unconditionally
5148 * in x86_emulate_insn.
5150 ctxt->check_perm = opcode.check_perm;
5151 ctxt->intercept = opcode.intercept;
5153 if (ctxt->d & NotImpl)
5154 return EMULATION_FAILED;
5156 if (mode == X86EMUL_MODE_PROT64) {
5157 if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
5159 else if (ctxt->d & NearBranch)
5163 if (ctxt->d & Op3264) {
5164 if (mode == X86EMUL_MODE_PROT64)
5170 if ((ctxt->d & No16) && ctxt->op_bytes == 2)
5174 ctxt->op_bytes = 16;
5175 else if (ctxt->d & Mmx)
5179 /* ModRM and SIB bytes. */
5180 if (ctxt->d & ModRM) {
5181 rc = decode_modrm(ctxt, &ctxt->memop);
5182 if (!has_seg_override) {
5183 has_seg_override = true;
5184 ctxt->seg_override = ctxt->modrm_seg;
5186 } else if (ctxt->d & MemAbs)
5187 rc = decode_abs(ctxt, &ctxt->memop);
5188 if (rc != X86EMUL_CONTINUE)
5191 if (!has_seg_override)
5192 ctxt->seg_override = VCPU_SREG_DS;
5194 ctxt->memop.addr.mem.seg = ctxt->seg_override;
5197 * Decode and fetch the source operand: register, memory
5200 rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
5201 if (rc != X86EMUL_CONTINUE)
5205 * Decode and fetch the second source operand: register, memory
5208 rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
5209 if (rc != X86EMUL_CONTINUE)
5212 /* Decode and fetch the destination operand: register or memory. */
5213 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
5215 if (ctxt->rip_relative && likely(ctxt->memopp))
5216 ctxt->memopp->addr.mem.ea = address_mask(ctxt,
5217 ctxt->memopp->addr.mem.ea + ctxt->_eip);
5220 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
5223 bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
5225 return ctxt->d & PageTable;
5228 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
5230 /* The second termination condition only applies for REPE
5231 * and REPNE. Test if the repeat string operation prefix is
5232 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
5233 * corresponding termination condition according to:
5234 * - if REPE/REPZ and ZF = 0 then done
5235 * - if REPNE/REPNZ and ZF = 1 then done
5237 if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
5238 (ctxt->b == 0xae) || (ctxt->b == 0xaf))
5239 && (((ctxt->rep_prefix == REPE_PREFIX) &&
5240 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
5241 || ((ctxt->rep_prefix == REPNE_PREFIX) &&
5242 ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
5248 static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
5252 ctxt->ops->get_fpu(ctxt);
5253 rc = asm_safe("fwait");
5254 ctxt->ops->put_fpu(ctxt);
5256 if (unlikely(rc != X86EMUL_CONTINUE))
5257 return emulate_exception(ctxt, MF_VECTOR, 0, false);
5259 return X86EMUL_CONTINUE;
5262 static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
5265 if (op->type == OP_MM)
5266 read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
5269 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
5271 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
5272 if (!(ctxt->d & ByteOp))
5273 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
5274 asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
5275 : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
5277 : "c"(ctxt->src2.val));
5278 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
5279 if (!fop) /* exception is returned in fop variable */
5280 return emulate_de(ctxt);
5281 return X86EMUL_CONTINUE;
5284 void init_decode_cache(struct x86_emulate_ctxt *ctxt)
5286 memset(&ctxt->rip_relative, 0,
5287 (void *)&ctxt->modrm - (void *)&ctxt->rip_relative);
5289 ctxt->io_read.pos = 0;
5290 ctxt->io_read.end = 0;
5291 ctxt->mem_read.end = 0;
5294 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
5296 const struct x86_emulate_ops *ops = ctxt->ops;
5297 int rc = X86EMUL_CONTINUE;
5298 int saved_dst_type = ctxt->dst.type;
5300 ctxt->mem_read.pos = 0;
5302 /* LOCK prefix is allowed only with some instructions */
5303 if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
5304 rc = emulate_ud(ctxt);
5308 if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
5309 rc = emulate_ud(ctxt);
5313 if (unlikely(ctxt->d &
5314 (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
5315 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
5316 (ctxt->d & Undefined)) {
5317 rc = emulate_ud(ctxt);
5321 if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
5322 || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
5323 rc = emulate_ud(ctxt);
5327 if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
5328 rc = emulate_nm(ctxt);
5332 if (ctxt->d & Mmx) {
5333 rc = flush_pending_x87_faults(ctxt);
5334 if (rc != X86EMUL_CONTINUE)
5337 * Now that we know the fpu is exception safe, we can fetch
5340 fetch_possible_mmx_operand(ctxt, &ctxt->src);
5341 fetch_possible_mmx_operand(ctxt, &ctxt->src2);
5342 if (!(ctxt->d & Mov))
5343 fetch_possible_mmx_operand(ctxt, &ctxt->dst);
5346 if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
5347 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5348 X86_ICPT_PRE_EXCEPT);
5349 if (rc != X86EMUL_CONTINUE)
5353 /* Instruction can only be executed in protected mode */
5354 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
5355 rc = emulate_ud(ctxt);
5359 /* Privileged instruction can be executed only in CPL=0 */
5360 if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
5361 if (ctxt->d & PrivUD)
5362 rc = emulate_ud(ctxt);
5364 rc = emulate_gp(ctxt, 0);
5368 /* Do instruction specific permission checks */
5369 if (ctxt->d & CheckPerm) {
5370 rc = ctxt->check_perm(ctxt);
5371 if (rc != X86EMUL_CONTINUE)
5375 if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5376 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5377 X86_ICPT_POST_EXCEPT);
5378 if (rc != X86EMUL_CONTINUE)
5382 if (ctxt->rep_prefix && (ctxt->d & String)) {
5383 /* All REP prefixes have the same first termination condition */
5384 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
5385 string_registers_quirk(ctxt);
5386 ctxt->eip = ctxt->_eip;
5387 ctxt->eflags &= ~X86_EFLAGS_RF;
5393 if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
5394 rc = segmented_read(ctxt, ctxt->src.addr.mem,
5395 ctxt->src.valptr, ctxt->src.bytes);
5396 if (rc != X86EMUL_CONTINUE)
5398 ctxt->src.orig_val64 = ctxt->src.val64;
5401 if (ctxt->src2.type == OP_MEM) {
5402 rc = segmented_read(ctxt, ctxt->src2.addr.mem,
5403 &ctxt->src2.val, ctxt->src2.bytes);
5404 if (rc != X86EMUL_CONTINUE)
5408 if ((ctxt->d & DstMask) == ImplicitOps)
5412 if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
5413 /* optimisation - avoid slow emulated read if Mov */
5414 rc = segmented_read(ctxt, ctxt->dst.addr.mem,
5415 &ctxt->dst.val, ctxt->dst.bytes);
5416 if (rc != X86EMUL_CONTINUE) {
5417 if (!(ctxt->d & NoWrite) &&
5418 rc == X86EMUL_PROPAGATE_FAULT &&
5419 ctxt->exception.vector == PF_VECTOR)
5420 ctxt->exception.error_code |= PFERR_WRITE_MASK;
5424 /* Copy full 64-bit value for CMPXCHG8B. */
5425 ctxt->dst.orig_val64 = ctxt->dst.val64;
5429 if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5430 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5431 X86_ICPT_POST_MEMACCESS);
5432 if (rc != X86EMUL_CONTINUE)
5436 if (ctxt->rep_prefix && (ctxt->d & String))
5437 ctxt->eflags |= X86_EFLAGS_RF;
5439 ctxt->eflags &= ~X86_EFLAGS_RF;
5441 if (ctxt->execute) {
5442 if (ctxt->d & Fastop) {
5443 void (*fop)(struct fastop *) = (void *)ctxt->execute;
5444 rc = fastop(ctxt, fop);
5445 if (rc != X86EMUL_CONTINUE)
5449 rc = ctxt->execute(ctxt);
5450 if (rc != X86EMUL_CONTINUE)
5455 if (ctxt->opcode_len == 2)
5457 else if (ctxt->opcode_len == 3)
5458 goto threebyte_insn;
5461 case 0x70 ... 0x7f: /* jcc (short) */
5462 if (test_cc(ctxt->b, ctxt->eflags))
5463 rc = jmp_rel(ctxt, ctxt->src.val);
5465 case 0x8d: /* lea r16/r32, m */
5466 ctxt->dst.val = ctxt->src.addr.mem.ea;
5468 case 0x90 ... 0x97: /* nop / xchg reg, rax */
5469 if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
5470 ctxt->dst.type = OP_NONE;
5474 case 0x98: /* cbw/cwde/cdqe */
5475 switch (ctxt->op_bytes) {
5476 case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
5477 case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
5478 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
5481 case 0xcc: /* int3 */
5482 rc = emulate_int(ctxt, 3);
5484 case 0xcd: /* int n */
5485 rc = emulate_int(ctxt, ctxt->src.val);
5487 case 0xce: /* into */
5488 if (ctxt->eflags & X86_EFLAGS_OF)
5489 rc = emulate_int(ctxt, 4);
5491 case 0xe9: /* jmp rel */
5492 case 0xeb: /* jmp rel short */
5493 rc = jmp_rel(ctxt, ctxt->src.val);
5494 ctxt->dst.type = OP_NONE; /* Disable writeback. */
5496 case 0xf4: /* hlt */
5497 ctxt->ops->halt(ctxt);
5499 case 0xf5: /* cmc */
5500 /* complement carry flag from eflags reg */
5501 ctxt->eflags ^= X86_EFLAGS_CF;
5503 case 0xf8: /* clc */
5504 ctxt->eflags &= ~X86_EFLAGS_CF;
5506 case 0xf9: /* stc */
5507 ctxt->eflags |= X86_EFLAGS_CF;
5509 case 0xfc: /* cld */
5510 ctxt->eflags &= ~X86_EFLAGS_DF;
5512 case 0xfd: /* std */
5513 ctxt->eflags |= X86_EFLAGS_DF;
5516 goto cannot_emulate;
5519 if (rc != X86EMUL_CONTINUE)
5523 if (ctxt->d & SrcWrite) {
5524 BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
5525 rc = writeback(ctxt, &ctxt->src);
5526 if (rc != X86EMUL_CONTINUE)
5529 if (!(ctxt->d & NoWrite)) {
5530 rc = writeback(ctxt, &ctxt->dst);
5531 if (rc != X86EMUL_CONTINUE)
5536 * restore dst type in case the decoding will be reused
5537 * (happens for string instruction )
5539 ctxt->dst.type = saved_dst_type;
5541 if ((ctxt->d & SrcMask) == SrcSI)
5542 string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
5544 if ((ctxt->d & DstMask) == DstDI)
5545 string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
5547 if (ctxt->rep_prefix && (ctxt->d & String)) {
5549 struct read_cache *r = &ctxt->io_read;
5550 if ((ctxt->d & SrcMask) == SrcSI)
5551 count = ctxt->src.count;
5553 count = ctxt->dst.count;
5554 register_address_increment(ctxt, VCPU_REGS_RCX, -count);
5556 if (!string_insn_completed(ctxt)) {
5558 * Re-enter guest when pio read ahead buffer is empty
5559 * or, if it is not used, after each 1024 iteration.
5561 if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
5562 (r->end == 0 || r->end != r->pos)) {
5564 * Reset read cache. Usually happens before
5565 * decode, but since instruction is restarted
5566 * we have to do it here.
5568 ctxt->mem_read.end = 0;
5569 writeback_registers(ctxt);
5570 return EMULATION_RESTART;
5572 goto done; /* skip rip writeback */
5574 ctxt->eflags &= ~X86_EFLAGS_RF;
5577 ctxt->eip = ctxt->_eip;
5580 if (rc == X86EMUL_PROPAGATE_FAULT) {
5581 WARN_ON(ctxt->exception.vector > 0x1f);
5582 ctxt->have_exception = true;
5584 if (rc == X86EMUL_INTERCEPTED)
5585 return EMULATION_INTERCEPTED;
5587 if (rc == X86EMUL_CONTINUE)
5588 writeback_registers(ctxt);
5590 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
5594 case 0x09: /* wbinvd */
5595 (ctxt->ops->wbinvd)(ctxt);
5597 case 0x08: /* invd */
5598 case 0x0d: /* GrpP (prefetch) */
5599 case 0x18: /* Grp16 (prefetch/nop) */
5600 case 0x1f: /* nop */
5602 case 0x20: /* mov cr, reg */
5603 ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5605 case 0x21: /* mov from dr to reg */
5606 ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
5608 case 0x40 ... 0x4f: /* cmov */
5609 if (test_cc(ctxt->b, ctxt->eflags))
5610 ctxt->dst.val = ctxt->src.val;
5611 else if (ctxt->op_bytes != 4)
5612 ctxt->dst.type = OP_NONE; /* no writeback */
5614 case 0x80 ... 0x8f: /* jnz rel, etc*/
5615 if (test_cc(ctxt->b, ctxt->eflags))
5616 rc = jmp_rel(ctxt, ctxt->src.val);
5618 case 0x90 ... 0x9f: /* setcc r/m8 */
5619 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5621 case 0xb6 ... 0xb7: /* movzx */
5622 ctxt->dst.bytes = ctxt->op_bytes;
5623 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5624 : (u16) ctxt->src.val;
5626 case 0xbe ... 0xbf: /* movsx */
5627 ctxt->dst.bytes = ctxt->op_bytes;
5628 ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5629 (s16) ctxt->src.val;
5632 goto cannot_emulate;
5637 if (rc != X86EMUL_CONTINUE)
5643 return EMULATION_FAILED;
5646 void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
5648 invalidate_registers(ctxt);
5651 void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
5653 writeback_registers(ctxt);