1 /******************************************************************************
4 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
6 * Copyright (c) 2005 Keir Fraser
8 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9 * privileged instructions:
11 * Copyright (C) 2006 Qumranet
13 * Avi Kivity <avi@qumranet.com>
14 * Yaniv Kamay <yaniv@qumranet.com>
16 * This work is licensed under the terms of the GNU GPL, version 2. See
17 * the COPYING file in the top-level directory.
19 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
25 #include <public/xen.h>
26 #define DPRINTF(_f, _a ...) printf(_f , ## _a)
28 #include <linux/kvm_host.h>
29 #include "kvm_cache_regs.h"
30 #define DPRINTF(x...) do {} while (0)
32 #include <linux/module.h>
33 #include <asm/kvm_emulate.h>
39 * Opcode effective-address decode tables.
40 * Note that we only emulate instructions that have at least one memory
41 * operand (excluding implicit stack references). We assume that stack
42 * references and instruction fetches will never occur in special memory
43 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
47 /* Operand sizes: 8-bit operands or specified/overridden size. */
48 #define ByteOp (1<<0) /* 8-bit operands. */
49 /* Destination operand type. */
50 #define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
51 #define DstReg (2<<1) /* Register operand. */
52 #define DstMem (3<<1) /* Memory operand. */
53 #define DstAcc (4<<1) /* Destination Accumulator */
54 #define DstDI (5<<1) /* Destination is in ES:(E)DI */
55 #define DstMem64 (6<<1) /* 64bit memory operand */
56 #define DstMask (7<<1)
57 /* Source operand type. */
58 #define SrcNone (0<<4) /* No source operand. */
59 #define SrcImplicit (0<<4) /* Source operand is implicit in the opcode. */
60 #define SrcReg (1<<4) /* Register operand. */
61 #define SrcMem (2<<4) /* Memory operand. */
62 #define SrcMem16 (3<<4) /* Memory operand (16-bit). */
63 #define SrcMem32 (4<<4) /* Memory operand (32-bit). */
64 #define SrcImm (5<<4) /* Immediate operand. */
65 #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */
66 #define SrcOne (7<<4) /* Implied '1' */
67 #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */
68 #define SrcImmU (9<<4) /* Immediate operand, unsigned */
69 #define SrcSI (0xa<<4) /* Source is in the DS:RSI */
70 #define SrcMask (0xf<<4)
71 /* Generic ModRM decode. */
73 /* Destination is only written; never read. */
76 #define MemAbs (1<<11) /* Memory operand is absolute displacement */
77 #define String (1<<12) /* String instruction (rep capable) */
78 #define Stack (1<<13) /* Stack instruction (push/pop) */
79 #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
80 #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
81 #define GroupMask 0xff /* Group number stored in bits 0:7 */
83 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
84 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
86 /* Source 2 operand type */
87 #define Src2None (0<<29)
88 #define Src2CL (1<<29)
89 #define Src2ImmByte (2<<29)
90 #define Src2One (3<<29)
91 #define Src2Imm16 (4<<29)
92 #define Src2Mem16 (5<<29) /* Used for Ep encoding. First argument has to be
93 in memory and second argument is located
94 immediately after the first one in memory. */
95 #define Src2Mask (7<<29)
98 Group1_80, Group1_81, Group1_82, Group1_83,
99 Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
103 static u32 opcode_table[256] = {
105 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
106 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
107 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
108 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
110 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
111 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
112 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
113 ImplicitOps | Stack | No64, 0,
115 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
116 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
117 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
118 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
120 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
121 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
122 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
123 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
125 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
126 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
127 DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
129 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
130 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
133 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
134 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
137 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
138 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
139 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
142 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
144 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
146 SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
147 SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
149 DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
150 DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
152 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
153 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
156 SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0,
157 DstDI | ByteOp | Mov | String, DstDI | Mov | String, /* insb, insw/insd */
158 SrcSI | ByteOp | ImplicitOps | String, SrcSI | ImplicitOps | String, /* outsb, outsw/outsd */
160 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
161 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
163 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
164 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
166 Group | Group1_80, Group | Group1_81,
167 Group | Group1_82, Group | Group1_83,
168 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
169 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
171 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
172 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
173 DstMem | SrcReg | ModRM | Mov, ModRM | DstReg,
174 DstReg | SrcMem | ModRM | Mov, Group | Group1A,
176 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
178 0, 0, SrcImm | Src2Imm16 | No64, 0,
179 ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
181 ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
182 ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs,
183 ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String,
184 ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String,
186 0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String,
187 ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String,
188 ByteOp | DstDI | String, DstDI | String,
190 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
191 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
192 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
193 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
195 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
196 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
197 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
198 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
200 ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
201 0, ImplicitOps | Stack, 0, 0,
202 ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
204 0, 0, 0, ImplicitOps | Stack,
205 ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps,
207 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
208 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
211 0, 0, 0, 0, 0, 0, 0, 0,
214 ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc,
215 ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc,
217 SrcImm | Stack, SrcImm | ImplicitOps,
218 SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps,
219 SrcNone | ByteOp | DstAcc, SrcNone | DstAcc,
220 SrcNone | ByteOp | DstAcc, SrcNone | DstAcc,
223 ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3,
225 ImplicitOps, 0, ImplicitOps, ImplicitOps,
226 ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
229 static u32 twobyte_table[256] = {
231 0, Group | GroupDual | Group7, 0, 0,
232 0, ImplicitOps, ImplicitOps | Priv, 0,
233 ImplicitOps | Priv, ImplicitOps | Priv, 0, 0,
234 0, ImplicitOps | ModRM, 0, 0,
236 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
238 ModRM | ImplicitOps | Priv, ModRM | Priv,
239 ModRM | ImplicitOps | Priv, ModRM | Priv,
241 0, 0, 0, 0, 0, 0, 0, 0,
243 ImplicitOps | Priv, 0, ImplicitOps | Priv, 0,
244 ImplicitOps, ImplicitOps | Priv, 0, 0,
245 0, 0, 0, 0, 0, 0, 0, 0,
247 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
248 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
249 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
250 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
252 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
253 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
254 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
255 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
257 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
259 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
261 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
263 SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
264 SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
266 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
268 ImplicitOps | Stack, ImplicitOps | Stack,
269 0, DstMem | SrcReg | ModRM | BitOp,
270 DstMem | SrcReg | Src2ImmByte | ModRM,
271 DstMem | SrcReg | Src2CL | ModRM, 0, 0,
273 ImplicitOps | Stack, ImplicitOps | Stack,
274 0, DstMem | SrcReg | ModRM | BitOp | Lock,
275 DstMem | SrcReg | Src2ImmByte | ModRM,
276 DstMem | SrcReg | Src2CL | ModRM,
279 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
280 0, DstMem | SrcReg | ModRM | BitOp | Lock,
281 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
282 DstReg | SrcMem16 | ModRM | Mov,
285 Group | Group8, DstMem | SrcReg | ModRM | BitOp | Lock,
286 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
287 DstReg | SrcMem16 | ModRM | Mov,
289 0, 0, 0, DstMem | SrcReg | ModRM | Mov,
290 0, 0, 0, Group | GroupDual | Group9,
291 0, 0, 0, 0, 0, 0, 0, 0,
293 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
295 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
297 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
300 static u32 group_table[] = {
302 ByteOp | DstMem | SrcImm | ModRM | Lock,
303 ByteOp | DstMem | SrcImm | ModRM | Lock,
304 ByteOp | DstMem | SrcImm | ModRM | Lock,
305 ByteOp | DstMem | SrcImm | ModRM | Lock,
306 ByteOp | DstMem | SrcImm | ModRM | Lock,
307 ByteOp | DstMem | SrcImm | ModRM | Lock,
308 ByteOp | DstMem | SrcImm | ModRM | Lock,
309 ByteOp | DstMem | SrcImm | ModRM,
311 DstMem | SrcImm | ModRM | Lock,
312 DstMem | SrcImm | ModRM | Lock,
313 DstMem | SrcImm | ModRM | Lock,
314 DstMem | SrcImm | ModRM | Lock,
315 DstMem | SrcImm | ModRM | Lock,
316 DstMem | SrcImm | ModRM | Lock,
317 DstMem | SrcImm | ModRM | Lock,
318 DstMem | SrcImm | ModRM,
320 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
321 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
322 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
323 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
324 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
325 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
326 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
327 ByteOp | DstMem | SrcImm | ModRM | No64,
329 DstMem | SrcImmByte | ModRM | Lock,
330 DstMem | SrcImmByte | ModRM | Lock,
331 DstMem | SrcImmByte | ModRM | Lock,
332 DstMem | SrcImmByte | ModRM | Lock,
333 DstMem | SrcImmByte | ModRM | Lock,
334 DstMem | SrcImmByte | ModRM | Lock,
335 DstMem | SrcImmByte | ModRM | Lock,
336 DstMem | SrcImmByte | ModRM,
338 DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
340 ByteOp | SrcImm | DstMem | ModRM, 0,
341 ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
344 DstMem | SrcImm | ModRM, 0,
345 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
348 ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
351 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
352 SrcMem | ModRM | Stack, 0,
353 SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps,
354 SrcMem | ModRM | Stack, 0,
356 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv,
357 SrcNone | ModRM | DstMem | Mov, 0,
358 SrcMem16 | ModRM | Mov | Priv, SrcMem | ModRM | ByteOp | Priv,
361 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock,
362 DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock,
364 0, DstMem64 | ModRM | Lock, 0, 0, 0, 0, 0, 0,
367 static u32 group2_table[] = {
369 SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM | Priv,
370 SrcNone | ModRM | DstMem | Mov, 0,
371 SrcMem16 | ModRM | Mov | Priv, 0,
373 0, 0, 0, 0, 0, 0, 0, 0,
376 /* EFLAGS bit definitions. */
377 #define EFLG_ID (1<<21)
378 #define EFLG_VIP (1<<20)
379 #define EFLG_VIF (1<<19)
380 #define EFLG_AC (1<<18)
381 #define EFLG_VM (1<<17)
382 #define EFLG_RF (1<<16)
383 #define EFLG_IOPL (3<<12)
384 #define EFLG_NT (1<<14)
385 #define EFLG_OF (1<<11)
386 #define EFLG_DF (1<<10)
387 #define EFLG_IF (1<<9)
388 #define EFLG_TF (1<<8)
389 #define EFLG_SF (1<<7)
390 #define EFLG_ZF (1<<6)
391 #define EFLG_AF (1<<4)
392 #define EFLG_PF (1<<2)
393 #define EFLG_CF (1<<0)
396 * Instruction emulation:
397 * Most instructions are emulated directly via a fragment of inline assembly
398 * code. This allows us to save/restore EFLAGS and thus very easily pick up
399 * any modified flags.
402 #if defined(CONFIG_X86_64)
403 #define _LO32 "k" /* force 32-bit operand */
404 #define _STK "%%rsp" /* stack pointer */
405 #elif defined(__i386__)
406 #define _LO32 "" /* force 32-bit operand */
407 #define _STK "%%esp" /* stack pointer */
411 * These EFLAGS bits are restored from saved value during emulation, and
412 * any changes are written back to the saved value after emulation.
414 #define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
416 /* Before executing instruction: restore necessary bits in EFLAGS. */
417 #define _PRE_EFLAGS(_sav, _msk, _tmp) \
418 /* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
419 "movl %"_sav",%"_LO32 _tmp"; " \
422 "movl %"_msk",%"_LO32 _tmp"; " \
423 "andl %"_LO32 _tmp",("_STK"); " \
425 "notl %"_LO32 _tmp"; " \
426 "andl %"_LO32 _tmp",("_STK"); " \
427 "andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); " \
429 "orl %"_LO32 _tmp",("_STK"); " \
433 /* After executing instruction: write-back necessary bits in EFLAGS. */
434 #define _POST_EFLAGS(_sav, _msk, _tmp) \
435 /* _sav |= EFLAGS & _msk; */ \
438 "andl %"_msk",%"_LO32 _tmp"; " \
439 "orl %"_LO32 _tmp",%"_sav"; "
447 #define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \
449 __asm__ __volatile__ ( \
450 _PRE_EFLAGS("0", "4", "2") \
451 _op _suffix " %"_x"3,%1; " \
452 _POST_EFLAGS("0", "4", "2") \
453 : "=m" (_eflags), "=m" ((_dst).val), \
455 : _y ((_src).val), "i" (EFLAGS_MASK)); \
459 /* Raw emulation: instruction has two explicit operands. */
460 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
462 unsigned long _tmp; \
464 switch ((_dst).bytes) { \
466 ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
469 ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
472 ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
477 #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
479 unsigned long _tmp; \
480 switch ((_dst).bytes) { \
482 ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \
485 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
486 _wx, _wy, _lx, _ly, _qx, _qy); \
491 /* Source operand is byte-sized and may be restricted to just %cl. */
492 #define emulate_2op_SrcB(_op, _src, _dst, _eflags) \
493 __emulate_2op(_op, _src, _dst, _eflags, \
494 "b", "c", "b", "c", "b", "c", "b", "c")
496 /* Source operand is byte, word, long or quad sized. */
497 #define emulate_2op_SrcV(_op, _src, _dst, _eflags) \
498 __emulate_2op(_op, _src, _dst, _eflags, \
499 "b", "q", "w", "r", _LO32, "r", "", "r")
501 /* Source operand is word, long or quad sized. */
502 #define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags) \
503 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
504 "w", "r", _LO32, "r", "", "r")
506 /* Instruction has three operands and one operand is stored in ECX register */
507 #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \
509 unsigned long _tmp; \
510 _type _clv = (_cl).val; \
511 _type _srcv = (_src).val; \
512 _type _dstv = (_dst).val; \
514 __asm__ __volatile__ ( \
515 _PRE_EFLAGS("0", "5", "2") \
516 _op _suffix " %4,%1 \n" \
517 _POST_EFLAGS("0", "5", "2") \
518 : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \
519 : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \
522 (_cl).val = (unsigned long) _clv; \
523 (_src).val = (unsigned long) _srcv; \
524 (_dst).val = (unsigned long) _dstv; \
527 #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \
529 switch ((_dst).bytes) { \
531 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
532 "w", unsigned short); \
535 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
536 "l", unsigned int); \
539 ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
540 "q", unsigned long)); \
545 #define __emulate_1op(_op, _dst, _eflags, _suffix) \
547 unsigned long _tmp; \
549 __asm__ __volatile__ ( \
550 _PRE_EFLAGS("0", "3", "2") \
551 _op _suffix " %1; " \
552 _POST_EFLAGS("0", "3", "2") \
553 : "=m" (_eflags), "+m" ((_dst).val), \
555 : "i" (EFLAGS_MASK)); \
558 /* Instruction has only one explicit operand (no source operand). */
559 #define emulate_1op(_op, _dst, _eflags) \
561 switch ((_dst).bytes) { \
562 case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \
563 case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \
564 case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \
565 case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
569 /* Fetch next part of the instruction being emulated. */
570 #define insn_fetch(_type, _size, _eip) \
571 ({ unsigned long _x; \
572 rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \
573 if (rc != X86EMUL_CONTINUE) \
579 static inline unsigned long ad_mask(struct decode_cache *c)
581 return (1UL << (c->ad_bytes << 3)) - 1;
584 /* Access/update address held in a register, based on addressing mode. */
585 static inline unsigned long
586 address_mask(struct decode_cache *c, unsigned long reg)
588 if (c->ad_bytes == sizeof(unsigned long))
591 return reg & ad_mask(c);
594 static inline unsigned long
595 register_address(struct decode_cache *c, unsigned long base, unsigned long reg)
597 return base + address_mask(c, reg);
601 register_address_increment(struct decode_cache *c, unsigned long *reg, int inc)
603 if (c->ad_bytes == sizeof(unsigned long))
606 *reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c));
609 static inline void jmp_rel(struct decode_cache *c, int rel)
611 register_address_increment(c, &c->eip, rel);
614 static void set_seg_override(struct decode_cache *c, int seg)
616 c->has_seg_override = true;
617 c->seg_override = seg;
620 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
622 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
625 return kvm_x86_ops->get_segment_base(ctxt->vcpu, seg);
628 static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt,
629 struct decode_cache *c)
631 if (!c->has_seg_override)
634 return seg_base(ctxt, c->seg_override);
637 static unsigned long es_base(struct x86_emulate_ctxt *ctxt)
639 return seg_base(ctxt, VCPU_SREG_ES);
642 static unsigned long ss_base(struct x86_emulate_ctxt *ctxt)
644 return seg_base(ctxt, VCPU_SREG_SS);
647 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
648 struct x86_emulate_ops *ops,
649 unsigned long eip, u8 *dest)
651 struct fetch_cache *fc = &ctxt->decode.fetch;
655 if (eip == fc->end) {
656 cur_size = fc->end - fc->start;
657 size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip));
658 rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size,
659 size, ctxt->vcpu, NULL);
660 if (rc != X86EMUL_CONTINUE)
664 *dest = fc->data[eip - fc->start];
665 return X86EMUL_CONTINUE;
668 static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
669 struct x86_emulate_ops *ops,
670 unsigned long eip, void *dest, unsigned size)
674 /* x86 instructions are limited to 15 bytes. */
675 if (eip + size - ctxt->eip > 15)
676 return X86EMUL_UNHANDLEABLE;
678 rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
679 if (rc != X86EMUL_CONTINUE)
682 return X86EMUL_CONTINUE;
686 * Given the 'reg' portion of a ModRM byte, and a register block, return a
687 * pointer into the block that addresses the relevant register.
688 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
690 static void *decode_register(u8 modrm_reg, unsigned long *regs,
695 p = ®s[modrm_reg];
696 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
697 p = (unsigned char *)®s[modrm_reg & 3] + 1;
701 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
702 struct x86_emulate_ops *ops,
704 u16 *size, unsigned long *address, int op_bytes)
711 rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
713 if (rc != X86EMUL_CONTINUE)
715 rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
720 static int test_cc(unsigned int condition, unsigned int flags)
724 switch ((condition & 15) >> 1) {
726 rc |= (flags & EFLG_OF);
728 case 1: /* b/c/nae */
729 rc |= (flags & EFLG_CF);
732 rc |= (flags & EFLG_ZF);
735 rc |= (flags & (EFLG_CF|EFLG_ZF));
738 rc |= (flags & EFLG_SF);
741 rc |= (flags & EFLG_PF);
744 rc |= (flags & EFLG_ZF);
747 rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
751 /* Odd condition identifiers (lsb == 1) have inverted sense. */
752 return (!!rc ^ (condition & 1));
755 static void decode_register_operand(struct operand *op,
756 struct decode_cache *c,
759 unsigned reg = c->modrm_reg;
760 int highbyte_regs = c->rex_prefix == 0;
763 reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
765 if ((c->d & ByteOp) && !inhibit_bytereg) {
766 op->ptr = decode_register(reg, c->regs, highbyte_regs);
767 op->val = *(u8 *)op->ptr;
770 op->ptr = decode_register(reg, c->regs, 0);
771 op->bytes = c->op_bytes;
774 op->val = *(u16 *)op->ptr;
777 op->val = *(u32 *)op->ptr;
780 op->val = *(u64 *) op->ptr;
784 op->orig_val = op->val;
787 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
788 struct x86_emulate_ops *ops)
790 struct decode_cache *c = &ctxt->decode;
792 int index_reg = 0, base_reg = 0, scale;
793 int rc = X86EMUL_CONTINUE;
796 c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */
797 index_reg = (c->rex_prefix & 2) << 2; /* REX.X */
798 c->modrm_rm = base_reg = (c->rex_prefix & 1) << 3; /* REG.B */
801 c->modrm = insn_fetch(u8, 1, c->eip);
802 c->modrm_mod |= (c->modrm & 0xc0) >> 6;
803 c->modrm_reg |= (c->modrm & 0x38) >> 3;
804 c->modrm_rm |= (c->modrm & 0x07);
808 if (c->modrm_mod == 3) {
809 c->modrm_ptr = decode_register(c->modrm_rm,
810 c->regs, c->d & ByteOp);
811 c->modrm_val = *(unsigned long *)c->modrm_ptr;
815 if (c->ad_bytes == 2) {
816 unsigned bx = c->regs[VCPU_REGS_RBX];
817 unsigned bp = c->regs[VCPU_REGS_RBP];
818 unsigned si = c->regs[VCPU_REGS_RSI];
819 unsigned di = c->regs[VCPU_REGS_RDI];
821 /* 16-bit ModR/M decode. */
822 switch (c->modrm_mod) {
824 if (c->modrm_rm == 6)
825 c->modrm_ea += insn_fetch(u16, 2, c->eip);
828 c->modrm_ea += insn_fetch(s8, 1, c->eip);
831 c->modrm_ea += insn_fetch(u16, 2, c->eip);
834 switch (c->modrm_rm) {
836 c->modrm_ea += bx + si;
839 c->modrm_ea += bx + di;
842 c->modrm_ea += bp + si;
845 c->modrm_ea += bp + di;
854 if (c->modrm_mod != 0)
861 if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
862 (c->modrm_rm == 6 && c->modrm_mod != 0))
863 if (!c->has_seg_override)
864 set_seg_override(c, VCPU_SREG_SS);
865 c->modrm_ea = (u16)c->modrm_ea;
867 /* 32/64-bit ModR/M decode. */
868 if ((c->modrm_rm & 7) == 4) {
869 sib = insn_fetch(u8, 1, c->eip);
870 index_reg |= (sib >> 3) & 7;
874 if ((base_reg & 7) == 5 && c->modrm_mod == 0)
875 c->modrm_ea += insn_fetch(s32, 4, c->eip);
877 c->modrm_ea += c->regs[base_reg];
879 c->modrm_ea += c->regs[index_reg] << scale;
880 } else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) {
881 if (ctxt->mode == X86EMUL_MODE_PROT64)
884 c->modrm_ea += c->regs[c->modrm_rm];
885 switch (c->modrm_mod) {
887 if (c->modrm_rm == 5)
888 c->modrm_ea += insn_fetch(s32, 4, c->eip);
891 c->modrm_ea += insn_fetch(s8, 1, c->eip);
894 c->modrm_ea += insn_fetch(s32, 4, c->eip);
902 static int decode_abs(struct x86_emulate_ctxt *ctxt,
903 struct x86_emulate_ops *ops)
905 struct decode_cache *c = &ctxt->decode;
906 int rc = X86EMUL_CONTINUE;
908 switch (c->ad_bytes) {
910 c->modrm_ea = insn_fetch(u16, 2, c->eip);
913 c->modrm_ea = insn_fetch(u32, 4, c->eip);
916 c->modrm_ea = insn_fetch(u64, 8, c->eip);
924 x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
926 struct decode_cache *c = &ctxt->decode;
927 int rc = X86EMUL_CONTINUE;
928 int mode = ctxt->mode;
929 int def_op_bytes, def_ad_bytes, group;
932 /* we cannot decode insn before we complete previous rep insn */
933 WARN_ON(ctxt->restart);
935 /* Shadow copy of register state. Committed on successful emulation. */
936 memset(c, 0, sizeof(struct decode_cache));
938 c->fetch.start = c->fetch.end = c->eip;
939 ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
940 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
943 case X86EMUL_MODE_REAL:
944 case X86EMUL_MODE_VM86:
945 case X86EMUL_MODE_PROT16:
946 def_op_bytes = def_ad_bytes = 2;
948 case X86EMUL_MODE_PROT32:
949 def_op_bytes = def_ad_bytes = 4;
952 case X86EMUL_MODE_PROT64:
961 c->op_bytes = def_op_bytes;
962 c->ad_bytes = def_ad_bytes;
964 /* Legacy prefixes. */
966 switch (c->b = insn_fetch(u8, 1, c->eip)) {
967 case 0x66: /* operand-size override */
968 /* switch between 2/4 bytes */
969 c->op_bytes = def_op_bytes ^ 6;
971 case 0x67: /* address-size override */
972 if (mode == X86EMUL_MODE_PROT64)
973 /* switch between 4/8 bytes */
974 c->ad_bytes = def_ad_bytes ^ 12;
976 /* switch between 2/4 bytes */
977 c->ad_bytes = def_ad_bytes ^ 6;
979 case 0x26: /* ES override */
980 case 0x2e: /* CS override */
981 case 0x36: /* SS override */
982 case 0x3e: /* DS override */
983 set_seg_override(c, (c->b >> 3) & 3);
985 case 0x64: /* FS override */
986 case 0x65: /* GS override */
987 set_seg_override(c, c->b & 7);
989 case 0x40 ... 0x4f: /* REX */
990 if (mode != X86EMUL_MODE_PROT64)
992 c->rex_prefix = c->b;
994 case 0xf0: /* LOCK */
997 case 0xf2: /* REPNE/REPNZ */
998 c->rep_prefix = REPNE_PREFIX;
1000 case 0xf3: /* REP/REPE/REPZ */
1001 c->rep_prefix = REPE_PREFIX;
1007 /* Any legacy prefix after a REX prefix nullifies its effect. */
1016 if (c->rex_prefix & 8)
1017 c->op_bytes = 8; /* REX.W */
1019 /* Opcode byte(s). */
1020 c->d = opcode_table[c->b];
1022 /* Two-byte opcode? */
1025 c->b = insn_fetch(u8, 1, c->eip);
1026 c->d = twobyte_table[c->b];
1031 group = c->d & GroupMask;
1032 c->modrm = insn_fetch(u8, 1, c->eip);
1035 group = (group << 3) + ((c->modrm >> 3) & 7);
1036 if ((c->d & GroupDual) && (c->modrm >> 6) == 3)
1037 c->d = group2_table[group];
1039 c->d = group_table[group];
1044 DPRINTF("Cannot emulate %02x\n", c->b);
1048 if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
1051 /* ModRM and SIB bytes. */
1053 rc = decode_modrm(ctxt, ops);
1054 else if (c->d & MemAbs)
1055 rc = decode_abs(ctxt, ops);
1056 if (rc != X86EMUL_CONTINUE)
1059 if (!c->has_seg_override)
1060 set_seg_override(c, VCPU_SREG_DS);
1062 if (!(!c->twobyte && c->b == 0x8d))
1063 c->modrm_ea += seg_override_base(ctxt, c);
1065 if (c->ad_bytes != 8)
1066 c->modrm_ea = (u32)c->modrm_ea;
1068 if (c->rip_relative)
1069 c->modrm_ea += c->eip;
1072 * Decode and fetch the source operand: register, memory
1075 switch (c->d & SrcMask) {
1079 decode_register_operand(&c->src, c, 0);
1088 c->src.bytes = (c->d & ByteOp) ? 1 :
1090 /* Don't fetch the address for invlpg: it could be unmapped. */
1091 if (c->twobyte && c->b == 0x01 && c->modrm_reg == 7)
1095 * For instructions with a ModR/M byte, switch to register
1096 * access if Mod = 3.
1098 if ((c->d & ModRM) && c->modrm_mod == 3) {
1099 c->src.type = OP_REG;
1100 c->src.val = c->modrm_val;
1101 c->src.ptr = c->modrm_ptr;
1104 c->src.type = OP_MEM;
1105 c->src.ptr = (unsigned long *)c->modrm_ea;
1110 c->src.type = OP_IMM;
1111 c->src.ptr = (unsigned long *)c->eip;
1112 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1113 if (c->src.bytes == 8)
1115 /* NB. Immediates are sign-extended as necessary. */
1116 switch (c->src.bytes) {
1118 c->src.val = insn_fetch(s8, 1, c->eip);
1121 c->src.val = insn_fetch(s16, 2, c->eip);
1124 c->src.val = insn_fetch(s32, 4, c->eip);
1127 if ((c->d & SrcMask) == SrcImmU) {
1128 switch (c->src.bytes) {
1133 c->src.val &= 0xffff;
1136 c->src.val &= 0xffffffff;
1143 c->src.type = OP_IMM;
1144 c->src.ptr = (unsigned long *)c->eip;
1146 if ((c->d & SrcMask) == SrcImmByte)
1147 c->src.val = insn_fetch(s8, 1, c->eip);
1149 c->src.val = insn_fetch(u8, 1, c->eip);
1156 c->src.type = OP_MEM;
1157 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1158 c->src.ptr = (unsigned long *)
1159 register_address(c, seg_override_base(ctxt, c),
1160 c->regs[VCPU_REGS_RSI]);
1166 * Decode and fetch the second source operand: register, memory
1169 switch (c->d & Src2Mask) {
1174 c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
1177 c->src2.type = OP_IMM;
1178 c->src2.ptr = (unsigned long *)c->eip;
1180 c->src2.val = insn_fetch(u8, 1, c->eip);
1183 c->src2.type = OP_IMM;
1184 c->src2.ptr = (unsigned long *)c->eip;
1186 c->src2.val = insn_fetch(u16, 2, c->eip);
1193 c->src2.type = OP_MEM;
1195 c->src2.ptr = (unsigned long *)(c->modrm_ea + c->src.bytes);
1200 /* Decode and fetch the destination operand: register or memory. */
1201 switch (c->d & DstMask) {
1203 /* Special instructions do their own operand decoding. */
1206 decode_register_operand(&c->dst, c,
1207 c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
1211 if ((c->d & ModRM) && c->modrm_mod == 3) {
1212 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1213 c->dst.type = OP_REG;
1214 c->dst.val = c->dst.orig_val = c->modrm_val;
1215 c->dst.ptr = c->modrm_ptr;
1218 c->dst.type = OP_MEM;
1219 c->dst.ptr = (unsigned long *)c->modrm_ea;
1220 if ((c->d & DstMask) == DstMem64)
1223 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1226 unsigned long mask = ~(c->dst.bytes * 8 - 1);
1228 c->dst.ptr = (void *)c->dst.ptr +
1229 (c->src.val & mask) / 8;
1233 c->dst.type = OP_REG;
1234 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1235 c->dst.ptr = &c->regs[VCPU_REGS_RAX];
1236 switch (c->dst.bytes) {
1238 c->dst.val = *(u8 *)c->dst.ptr;
1241 c->dst.val = *(u16 *)c->dst.ptr;
1244 c->dst.val = *(u32 *)c->dst.ptr;
1247 c->dst.val = *(u64 *)c->dst.ptr;
1250 c->dst.orig_val = c->dst.val;
1253 c->dst.type = OP_MEM;
1254 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1255 c->dst.ptr = (unsigned long *)
1256 register_address(c, es_base(ctxt),
1257 c->regs[VCPU_REGS_RDI]);
1263 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
1266 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1267 struct x86_emulate_ops *ops,
1268 unsigned int size, unsigned short port,
1271 struct read_cache *rc = &ctxt->decode.io_read;
1273 if (rc->pos == rc->end) { /* refill pio read ahead */
1274 struct decode_cache *c = &ctxt->decode;
1275 unsigned int in_page, n;
1276 unsigned int count = c->rep_prefix ?
1277 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1;
1278 in_page = (ctxt->eflags & EFLG_DF) ?
1279 offset_in_page(c->regs[VCPU_REGS_RDI]) :
1280 PAGE_SIZE - offset_in_page(c->regs[VCPU_REGS_RDI]);
1281 n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size,
1285 rc->pos = rc->end = 0;
1286 if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu))
1291 memcpy(dest, rc->data + rc->pos, size);
1296 static u32 desc_limit_scaled(struct desc_struct *desc)
1298 u32 limit = get_desc_limit(desc);
1300 return desc->g ? (limit << 12) | 0xfff : limit;
1303 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1304 struct x86_emulate_ops *ops,
1305 u16 selector, struct desc_ptr *dt)
1307 if (selector & 1 << 2) {
1308 struct desc_struct desc;
1309 memset (dt, 0, sizeof *dt);
1310 if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu))
1313 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1314 dt->address = get_desc_base(&desc);
1316 ops->get_gdt(dt, ctxt->vcpu);
1319 /* allowed just for 8 bytes segments */
1320 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1321 struct x86_emulate_ops *ops,
1322 u16 selector, struct desc_struct *desc)
1325 u16 index = selector >> 3;
1330 get_descriptor_table_ptr(ctxt, ops, selector, &dt);
1332 if (dt.size < index * 8 + 7) {
1333 kvm_inject_gp(ctxt->vcpu, selector & 0xfffc);
1334 return X86EMUL_PROPAGATE_FAULT;
1336 addr = dt.address + index * 8;
1337 ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
1338 if (ret == X86EMUL_PROPAGATE_FAULT)
1339 kvm_inject_page_fault(ctxt->vcpu, addr, err);
1344 /* allowed just for 8 bytes segments */
1345 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1346 struct x86_emulate_ops *ops,
1347 u16 selector, struct desc_struct *desc)
1350 u16 index = selector >> 3;
1355 get_descriptor_table_ptr(ctxt, ops, selector, &dt);
1357 if (dt.size < index * 8 + 7) {
1358 kvm_inject_gp(ctxt->vcpu, selector & 0xfffc);
1359 return X86EMUL_PROPAGATE_FAULT;
1362 addr = dt.address + index * 8;
1363 ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
1364 if (ret == X86EMUL_PROPAGATE_FAULT)
1365 kvm_inject_page_fault(ctxt->vcpu, addr, err);
1370 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1371 struct x86_emulate_ops *ops,
1372 u16 selector, int seg)
1374 struct desc_struct seg_desc;
1376 unsigned err_vec = GP_VECTOR;
1378 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1381 memset(&seg_desc, 0, sizeof seg_desc);
1383 if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86)
1384 || ctxt->mode == X86EMUL_MODE_REAL) {
1385 /* set real mode segment descriptor */
1386 set_desc_base(&seg_desc, selector << 4);
1387 set_desc_limit(&seg_desc, 0xffff);
1394 /* NULL selector is not valid for TR, CS and SS */
1395 if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
1399 /* TR should be in GDT only */
1400 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1403 if (null_selector) /* for NULL selector skip all following checks */
1406 ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc);
1407 if (ret != X86EMUL_CONTINUE)
1410 err_code = selector & 0xfffc;
1411 err_vec = GP_VECTOR;
1413 /* can't load system descriptor into segment selecor */
1414 if (seg <= VCPU_SREG_GS && !seg_desc.s)
1418 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1424 cpl = ops->cpl(ctxt->vcpu);
1429 * segment is not a writable data segment or segment
1430 * selector's RPL != CPL or segment selector's RPL != CPL
1432 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1436 if (!(seg_desc.type & 8))
1439 if (seg_desc.type & 4) {
1445 if (rpl > cpl || dpl != cpl)
1448 /* CS(RPL) <- CPL */
1449 selector = (selector & 0xfffc) | cpl;
1452 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1455 case VCPU_SREG_LDTR:
1456 if (seg_desc.s || seg_desc.type != 2)
1459 default: /* DS, ES, FS, or GS */
1461 * segment is not a data or readable code segment or
1462 * ((segment is a data or nonconforming code segment)
1463 * and (both RPL and CPL > DPL))
1465 if ((seg_desc.type & 0xa) == 0x8 ||
1466 (((seg_desc.type & 0xc) != 0xc) &&
1467 (rpl > dpl && cpl > dpl)))
1473 /* mark segment as accessed */
1475 ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc);
1476 if (ret != X86EMUL_CONTINUE)
1480 ops->set_segment_selector(selector, seg, ctxt->vcpu);
1481 ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu);
1482 return X86EMUL_CONTINUE;
1484 kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code);
1485 return X86EMUL_PROPAGATE_FAULT;
1488 static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
1490 struct decode_cache *c = &ctxt->decode;
1492 c->dst.type = OP_MEM;
1493 c->dst.bytes = c->op_bytes;
1494 c->dst.val = c->src.val;
1495 register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
1496 c->dst.ptr = (void *) register_address(c, ss_base(ctxt),
1497 c->regs[VCPU_REGS_RSP]);
1500 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1501 struct x86_emulate_ops *ops,
1502 void *dest, int len)
1504 struct decode_cache *c = &ctxt->decode;
1507 rc = ops->read_emulated(register_address(c, ss_base(ctxt),
1508 c->regs[VCPU_REGS_RSP]),
1509 dest, len, ctxt->vcpu);
1510 if (rc != X86EMUL_CONTINUE)
1513 register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
1517 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1518 struct x86_emulate_ops *ops,
1519 void *dest, int len)
1522 unsigned long val, change_mask;
1523 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1524 int cpl = ops->cpl(ctxt->vcpu);
1526 rc = emulate_pop(ctxt, ops, &val, len);
1527 if (rc != X86EMUL_CONTINUE)
1530 change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
1531 | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
1533 switch(ctxt->mode) {
1534 case X86EMUL_MODE_PROT64:
1535 case X86EMUL_MODE_PROT32:
1536 case X86EMUL_MODE_PROT16:
1538 change_mask |= EFLG_IOPL;
1540 change_mask |= EFLG_IF;
1542 case X86EMUL_MODE_VM86:
1544 kvm_inject_gp(ctxt->vcpu, 0);
1545 return X86EMUL_PROPAGATE_FAULT;
1547 change_mask |= EFLG_IF;
1549 default: /* real mode */
1550 change_mask |= (EFLG_IOPL | EFLG_IF);
1554 *(unsigned long *)dest =
1555 (ctxt->eflags & ~change_mask) | (val & change_mask);
1560 static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
1562 struct decode_cache *c = &ctxt->decode;
1563 struct kvm_segment segment;
1565 kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg);
1567 c->src.val = segment.selector;
1571 static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
1572 struct x86_emulate_ops *ops, int seg)
1574 struct decode_cache *c = &ctxt->decode;
1575 unsigned long selector;
1578 rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
1579 if (rc != X86EMUL_CONTINUE)
1582 rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg);
1586 static void emulate_pusha(struct x86_emulate_ctxt *ctxt)
1588 struct decode_cache *c = &ctxt->decode;
1589 unsigned long old_esp = c->regs[VCPU_REGS_RSP];
1590 int reg = VCPU_REGS_RAX;
1592 while (reg <= VCPU_REGS_RDI) {
1593 (reg == VCPU_REGS_RSP) ?
1594 (c->src.val = old_esp) : (c->src.val = c->regs[reg]);
1601 static int emulate_popa(struct x86_emulate_ctxt *ctxt,
1602 struct x86_emulate_ops *ops)
1604 struct decode_cache *c = &ctxt->decode;
1605 int rc = X86EMUL_CONTINUE;
1606 int reg = VCPU_REGS_RDI;
1608 while (reg >= VCPU_REGS_RAX) {
1609 if (reg == VCPU_REGS_RSP) {
1610 register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1615 rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
1616 if (rc != X86EMUL_CONTINUE)
1623 static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1624 struct x86_emulate_ops *ops)
1626 struct decode_cache *c = &ctxt->decode;
1628 return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
1631 static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
1633 struct decode_cache *c = &ctxt->decode;
1634 switch (c->modrm_reg) {
1636 emulate_2op_SrcB("rol", c->src, c->dst, ctxt->eflags);
1639 emulate_2op_SrcB("ror", c->src, c->dst, ctxt->eflags);
1642 emulate_2op_SrcB("rcl", c->src, c->dst, ctxt->eflags);
1645 emulate_2op_SrcB("rcr", c->src, c->dst, ctxt->eflags);
1647 case 4: /* sal/shl */
1648 case 6: /* sal/shl */
1649 emulate_2op_SrcB("sal", c->src, c->dst, ctxt->eflags);
1652 emulate_2op_SrcB("shr", c->src, c->dst, ctxt->eflags);
1655 emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags);
1660 static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1661 struct x86_emulate_ops *ops)
1663 struct decode_cache *c = &ctxt->decode;
1665 switch (c->modrm_reg) {
1666 case 0 ... 1: /* test */
1667 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
1670 c->dst.val = ~c->dst.val;
1673 emulate_1op("neg", c->dst, ctxt->eflags);
1681 static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
1682 struct x86_emulate_ops *ops)
1684 struct decode_cache *c = &ctxt->decode;
1686 switch (c->modrm_reg) {
1688 emulate_1op("inc", c->dst, ctxt->eflags);
1691 emulate_1op("dec", c->dst, ctxt->eflags);
1693 case 2: /* call near abs */ {
1696 c->eip = c->src.val;
1697 c->src.val = old_eip;
1701 case 4: /* jmp abs */
1702 c->eip = c->src.val;
1708 return X86EMUL_CONTINUE;
1711 static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1712 struct x86_emulate_ops *ops)
1714 struct decode_cache *c = &ctxt->decode;
1715 u64 old = c->dst.orig_val;
1717 if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
1718 ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) {
1720 c->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
1721 c->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
1722 ctxt->eflags &= ~EFLG_ZF;
1724 c->dst.val = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
1725 (u32) c->regs[VCPU_REGS_RBX];
1727 ctxt->eflags |= EFLG_ZF;
1729 return X86EMUL_CONTINUE;
1732 static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
1733 struct x86_emulate_ops *ops)
1735 struct decode_cache *c = &ctxt->decode;
1739 rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
1740 if (rc != X86EMUL_CONTINUE)
1742 if (c->op_bytes == 4)
1743 c->eip = (u32)c->eip;
1744 rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1745 if (rc != X86EMUL_CONTINUE)
1747 rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
1751 static inline int writeback(struct x86_emulate_ctxt *ctxt,
1752 struct x86_emulate_ops *ops)
1755 struct decode_cache *c = &ctxt->decode;
1757 switch (c->dst.type) {
1759 /* The 4-byte case *is* correct:
1760 * in 64-bit mode we zero-extend.
1762 switch (c->dst.bytes) {
1764 *(u8 *)c->dst.ptr = (u8)c->dst.val;
1767 *(u16 *)c->dst.ptr = (u16)c->dst.val;
1770 *c->dst.ptr = (u32)c->dst.val;
1771 break; /* 64b: zero-ext */
1773 *c->dst.ptr = c->dst.val;
1779 rc = ops->cmpxchg_emulated(
1780 (unsigned long)c->dst.ptr,
1786 rc = ops->write_emulated(
1787 (unsigned long)c->dst.ptr,
1791 if (rc != X86EMUL_CONTINUE)
1800 return X86EMUL_CONTINUE;
1803 static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
1805 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask);
1807 * an sti; sti; sequence only disable interrupts for the first
1808 * instruction. So, if the last instruction, be it emulated or
1809 * not, left the system with the INT_STI flag enabled, it
1810 * means that the last instruction is an sti. We should not
1811 * leave the flag on in this case. The same goes for mov ss
1813 if (!(int_shadow & mask))
1814 ctxt->interruptibility = mask;
1818 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
1819 struct kvm_segment *cs, struct kvm_segment *ss)
1821 memset(cs, 0, sizeof(struct kvm_segment));
1822 kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS);
1823 memset(ss, 0, sizeof(struct kvm_segment));
1825 cs->l = 0; /* will be adjusted later */
1826 cs->base = 0; /* flat segment */
1827 cs->g = 1; /* 4kb granularity */
1828 cs->limit = 0xffffffff; /* 4GB limit */
1829 cs->type = 0x0b; /* Read, Execute, Accessed */
1831 cs->dpl = 0; /* will be adjusted later */
1836 ss->base = 0; /* flat segment */
1837 ss->limit = 0xffffffff; /* 4GB limit */
1838 ss->g = 1; /* 4kb granularity */
1840 ss->type = 0x03; /* Read/Write, Accessed */
1841 ss->db = 1; /* 32bit stack segment */
1847 emulate_syscall(struct x86_emulate_ctxt *ctxt)
1849 struct decode_cache *c = &ctxt->decode;
1850 struct kvm_segment cs, ss;
1853 /* syscall is not available in real mode */
1854 if (ctxt->mode == X86EMUL_MODE_REAL ||
1855 ctxt->mode == X86EMUL_MODE_VM86) {
1856 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1857 return X86EMUL_PROPAGATE_FAULT;
1860 setup_syscalls_segments(ctxt, &cs, &ss);
1862 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1864 cs.selector = (u16)(msr_data & 0xfffc);
1865 ss.selector = (u16)(msr_data + 8);
1867 if (is_long_mode(ctxt->vcpu)) {
1871 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1872 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1874 c->regs[VCPU_REGS_RCX] = c->eip;
1875 if (is_long_mode(ctxt->vcpu)) {
1876 #ifdef CONFIG_X86_64
1877 c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
1879 kvm_x86_ops->get_msr(ctxt->vcpu,
1880 ctxt->mode == X86EMUL_MODE_PROT64 ?
1881 MSR_LSTAR : MSR_CSTAR, &msr_data);
1884 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
1885 ctxt->eflags &= ~(msr_data | EFLG_RF);
1889 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1890 c->eip = (u32)msr_data;
1892 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1895 return X86EMUL_CONTINUE;
1899 emulate_sysenter(struct x86_emulate_ctxt *ctxt)
1901 struct decode_cache *c = &ctxt->decode;
1902 struct kvm_segment cs, ss;
1905 /* inject #GP if in real mode */
1906 if (ctxt->mode == X86EMUL_MODE_REAL) {
1907 kvm_inject_gp(ctxt->vcpu, 0);
1908 return X86EMUL_PROPAGATE_FAULT;
1911 /* XXX sysenter/sysexit have not been tested in 64bit mode.
1912 * Therefore, we inject an #UD.
1914 if (ctxt->mode == X86EMUL_MODE_PROT64) {
1915 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1916 return X86EMUL_PROPAGATE_FAULT;
1919 setup_syscalls_segments(ctxt, &cs, &ss);
1921 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1922 switch (ctxt->mode) {
1923 case X86EMUL_MODE_PROT32:
1924 if ((msr_data & 0xfffc) == 0x0) {
1925 kvm_inject_gp(ctxt->vcpu, 0);
1926 return X86EMUL_PROPAGATE_FAULT;
1929 case X86EMUL_MODE_PROT64:
1930 if (msr_data == 0x0) {
1931 kvm_inject_gp(ctxt->vcpu, 0);
1932 return X86EMUL_PROPAGATE_FAULT;
1937 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1938 cs.selector = (u16)msr_data;
1939 cs.selector &= ~SELECTOR_RPL_MASK;
1940 ss.selector = cs.selector + 8;
1941 ss.selector &= ~SELECTOR_RPL_MASK;
1942 if (ctxt->mode == X86EMUL_MODE_PROT64
1943 || is_long_mode(ctxt->vcpu)) {
1948 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1949 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1951 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
1954 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
1955 c->regs[VCPU_REGS_RSP] = msr_data;
1957 return X86EMUL_CONTINUE;
1961 emulate_sysexit(struct x86_emulate_ctxt *ctxt)
1963 struct decode_cache *c = &ctxt->decode;
1964 struct kvm_segment cs, ss;
1968 /* inject #GP if in real mode or Virtual 8086 mode */
1969 if (ctxt->mode == X86EMUL_MODE_REAL ||
1970 ctxt->mode == X86EMUL_MODE_VM86) {
1971 kvm_inject_gp(ctxt->vcpu, 0);
1972 return X86EMUL_PROPAGATE_FAULT;
1975 setup_syscalls_segments(ctxt, &cs, &ss);
1977 if ((c->rex_prefix & 0x8) != 0x0)
1978 usermode = X86EMUL_MODE_PROT64;
1980 usermode = X86EMUL_MODE_PROT32;
1984 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1986 case X86EMUL_MODE_PROT32:
1987 cs.selector = (u16)(msr_data + 16);
1988 if ((msr_data & 0xfffc) == 0x0) {
1989 kvm_inject_gp(ctxt->vcpu, 0);
1990 return X86EMUL_PROPAGATE_FAULT;
1992 ss.selector = (u16)(msr_data + 24);
1994 case X86EMUL_MODE_PROT64:
1995 cs.selector = (u16)(msr_data + 32);
1996 if (msr_data == 0x0) {
1997 kvm_inject_gp(ctxt->vcpu, 0);
1998 return X86EMUL_PROPAGATE_FAULT;
2000 ss.selector = cs.selector + 8;
2005 cs.selector |= SELECTOR_RPL_MASK;
2006 ss.selector |= SELECTOR_RPL_MASK;
2008 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
2009 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
2011 c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
2012 c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
2014 return X86EMUL_CONTINUE;
2017 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt,
2018 struct x86_emulate_ops *ops)
2021 if (ctxt->mode == X86EMUL_MODE_REAL)
2023 if (ctxt->mode == X86EMUL_MODE_VM86)
2025 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
2026 return ops->cpl(ctxt->vcpu) > iopl;
2029 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2030 struct x86_emulate_ops *ops,
2033 struct kvm_segment tr_seg;
2036 u8 perm, bit_idx = port & 0x7;
2037 unsigned mask = (1 << len) - 1;
2039 kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR);
2040 if (tr_seg.unusable)
2042 if (tr_seg.limit < 103)
2044 r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu,
2046 if (r != X86EMUL_CONTINUE)
2048 if (io_bitmap_ptr + port/8 > tr_seg.limit)
2050 r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1,
2052 if (r != X86EMUL_CONTINUE)
2054 if ((perm >> bit_idx) & mask)
2059 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
2060 struct x86_emulate_ops *ops,
2063 if (emulator_bad_iopl(ctxt, ops))
2064 if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
2069 static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt,
2070 struct x86_emulate_ops *ops,
2073 struct desc_struct desc;
2074 if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu))
2075 return get_desc_base(&desc);
2080 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2081 struct x86_emulate_ops *ops,
2082 struct tss_segment_16 *tss)
2084 struct decode_cache *c = &ctxt->decode;
2087 tss->flag = ctxt->eflags;
2088 tss->ax = c->regs[VCPU_REGS_RAX];
2089 tss->cx = c->regs[VCPU_REGS_RCX];
2090 tss->dx = c->regs[VCPU_REGS_RDX];
2091 tss->bx = c->regs[VCPU_REGS_RBX];
2092 tss->sp = c->regs[VCPU_REGS_RSP];
2093 tss->bp = c->regs[VCPU_REGS_RBP];
2094 tss->si = c->regs[VCPU_REGS_RSI];
2095 tss->di = c->regs[VCPU_REGS_RDI];
2097 tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
2098 tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2099 tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
2100 tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
2101 tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
2104 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2105 struct x86_emulate_ops *ops,
2106 struct tss_segment_16 *tss)
2108 struct decode_cache *c = &ctxt->decode;
2112 ctxt->eflags = tss->flag | 2;
2113 c->regs[VCPU_REGS_RAX] = tss->ax;
2114 c->regs[VCPU_REGS_RCX] = tss->cx;
2115 c->regs[VCPU_REGS_RDX] = tss->dx;
2116 c->regs[VCPU_REGS_RBX] = tss->bx;
2117 c->regs[VCPU_REGS_RSP] = tss->sp;
2118 c->regs[VCPU_REGS_RBP] = tss->bp;
2119 c->regs[VCPU_REGS_RSI] = tss->si;
2120 c->regs[VCPU_REGS_RDI] = tss->di;
2123 * SDM says that segment selectors are loaded before segment
2126 ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu);
2127 ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
2128 ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
2129 ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
2130 ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
2133 * Now load segment descriptors. If fault happenes at this stage
2134 * it is handled in a context of new task
2136 ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR);
2137 if (ret != X86EMUL_CONTINUE)
2139 ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
2140 if (ret != X86EMUL_CONTINUE)
2142 ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
2143 if (ret != X86EMUL_CONTINUE)
2145 ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2146 if (ret != X86EMUL_CONTINUE)
2148 ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2149 if (ret != X86EMUL_CONTINUE)
2152 return X86EMUL_CONTINUE;
2155 static int task_switch_16(struct x86_emulate_ctxt *ctxt,
2156 struct x86_emulate_ops *ops,
2157 u16 tss_selector, u16 old_tss_sel,
2158 ulong old_tss_base, struct desc_struct *new_desc)
2160 struct tss_segment_16 tss_seg;
2162 u32 err, new_tss_base = get_desc_base(new_desc);
2164 ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2166 if (ret == X86EMUL_PROPAGATE_FAULT) {
2167 /* FIXME: need to provide precise fault address */
2168 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2172 save_state_to_tss16(ctxt, ops, &tss_seg);
2174 ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2176 if (ret == X86EMUL_PROPAGATE_FAULT) {
2177 /* FIXME: need to provide precise fault address */
2178 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2182 ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2184 if (ret == X86EMUL_PROPAGATE_FAULT) {
2185 /* FIXME: need to provide precise fault address */
2186 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2190 if (old_tss_sel != 0xffff) {
2191 tss_seg.prev_task_link = old_tss_sel;
2193 ret = ops->write_std(new_tss_base,
2194 &tss_seg.prev_task_link,
2195 sizeof tss_seg.prev_task_link,
2197 if (ret == X86EMUL_PROPAGATE_FAULT) {
2198 /* FIXME: need to provide precise fault address */
2199 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2204 return load_state_from_tss16(ctxt, ops, &tss_seg);
2207 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2208 struct x86_emulate_ops *ops,
2209 struct tss_segment_32 *tss)
2211 struct decode_cache *c = &ctxt->decode;
2213 tss->cr3 = ops->get_cr(3, ctxt->vcpu);
2215 tss->eflags = ctxt->eflags;
2216 tss->eax = c->regs[VCPU_REGS_RAX];
2217 tss->ecx = c->regs[VCPU_REGS_RCX];
2218 tss->edx = c->regs[VCPU_REGS_RDX];
2219 tss->ebx = c->regs[VCPU_REGS_RBX];
2220 tss->esp = c->regs[VCPU_REGS_RSP];
2221 tss->ebp = c->regs[VCPU_REGS_RBP];
2222 tss->esi = c->regs[VCPU_REGS_RSI];
2223 tss->edi = c->regs[VCPU_REGS_RDI];
2225 tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
2226 tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2227 tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
2228 tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
2229 tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu);
2230 tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu);
2231 tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
2234 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2235 struct x86_emulate_ops *ops,
2236 struct tss_segment_32 *tss)
2238 struct decode_cache *c = &ctxt->decode;
2241 ops->set_cr(3, tss->cr3, ctxt->vcpu);
2243 ctxt->eflags = tss->eflags | 2;
2244 c->regs[VCPU_REGS_RAX] = tss->eax;
2245 c->regs[VCPU_REGS_RCX] = tss->ecx;
2246 c->regs[VCPU_REGS_RDX] = tss->edx;
2247 c->regs[VCPU_REGS_RBX] = tss->ebx;
2248 c->regs[VCPU_REGS_RSP] = tss->esp;
2249 c->regs[VCPU_REGS_RBP] = tss->ebp;
2250 c->regs[VCPU_REGS_RSI] = tss->esi;
2251 c->regs[VCPU_REGS_RDI] = tss->edi;
2254 * SDM says that segment selectors are loaded before segment
2257 ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu);
2258 ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
2259 ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
2260 ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
2261 ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
2262 ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu);
2263 ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu);
2266 * Now load segment descriptors. If fault happenes at this stage
2267 * it is handled in a context of new task
2269 ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR);
2270 if (ret != X86EMUL_CONTINUE)
2272 ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
2273 if (ret != X86EMUL_CONTINUE)
2275 ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
2276 if (ret != X86EMUL_CONTINUE)
2278 ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2279 if (ret != X86EMUL_CONTINUE)
2281 ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2282 if (ret != X86EMUL_CONTINUE)
2284 ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS);
2285 if (ret != X86EMUL_CONTINUE)
2287 ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS);
2288 if (ret != X86EMUL_CONTINUE)
2291 return X86EMUL_CONTINUE;
2294 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2295 struct x86_emulate_ops *ops,
2296 u16 tss_selector, u16 old_tss_sel,
2297 ulong old_tss_base, struct desc_struct *new_desc)
2299 struct tss_segment_32 tss_seg;
2301 u32 err, new_tss_base = get_desc_base(new_desc);
2303 ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2305 if (ret == X86EMUL_PROPAGATE_FAULT) {
2306 /* FIXME: need to provide precise fault address */
2307 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2311 save_state_to_tss32(ctxt, ops, &tss_seg);
2313 ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2315 if (ret == X86EMUL_PROPAGATE_FAULT) {
2316 /* FIXME: need to provide precise fault address */
2317 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2321 ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2323 if (ret == X86EMUL_PROPAGATE_FAULT) {
2324 /* FIXME: need to provide precise fault address */
2325 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2329 if (old_tss_sel != 0xffff) {
2330 tss_seg.prev_task_link = old_tss_sel;
2332 ret = ops->write_std(new_tss_base,
2333 &tss_seg.prev_task_link,
2334 sizeof tss_seg.prev_task_link,
2336 if (ret == X86EMUL_PROPAGATE_FAULT) {
2337 /* FIXME: need to provide precise fault address */
2338 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2343 return load_state_from_tss32(ctxt, ops, &tss_seg);
2346 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2347 struct x86_emulate_ops *ops,
2348 u16 tss_selector, int reason)
2350 struct desc_struct curr_tss_desc, next_tss_desc;
2352 u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu);
2353 ulong old_tss_base =
2354 get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR);
2357 /* FIXME: old_tss_base == ~0 ? */
2359 ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc);
2360 if (ret != X86EMUL_CONTINUE)
2362 ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc);
2363 if (ret != X86EMUL_CONTINUE)
2366 /* FIXME: check that next_tss_desc is tss */
2368 if (reason != TASK_SWITCH_IRET) {
2369 if ((tss_selector & 3) > next_tss_desc.dpl ||
2370 ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) {
2371 kvm_inject_gp(ctxt->vcpu, 0);
2372 return X86EMUL_PROPAGATE_FAULT;
2376 desc_limit = desc_limit_scaled(&next_tss_desc);
2377 if (!next_tss_desc.p ||
2378 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2379 desc_limit < 0x2b)) {
2380 kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR,
2381 tss_selector & 0xfffc);
2382 return X86EMUL_PROPAGATE_FAULT;
2385 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2386 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2387 write_segment_descriptor(ctxt, ops, old_tss_sel,
2391 if (reason == TASK_SWITCH_IRET)
2392 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2394 /* set back link to prev task only if NT bit is set in eflags
2395 note that old_tss_sel is not used afetr this point */
2396 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
2397 old_tss_sel = 0xffff;
2399 if (next_tss_desc.type & 8)
2400 ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel,
2401 old_tss_base, &next_tss_desc);
2403 ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel,
2404 old_tss_base, &next_tss_desc);
2405 if (ret != X86EMUL_CONTINUE)
2408 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
2409 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
2411 if (reason != TASK_SWITCH_IRET) {
2412 next_tss_desc.type |= (1 << 1); /* set busy flag */
2413 write_segment_descriptor(ctxt, ops, tss_selector,
2417 ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu);
2418 ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu);
2419 ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu);
2424 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2425 struct x86_emulate_ops *ops,
2426 u16 tss_selector, int reason)
2428 struct decode_cache *c = &ctxt->decode;
2431 memset(c, 0, sizeof(struct decode_cache));
2433 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
2435 rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason);
2437 if (rc == X86EMUL_CONTINUE) {
2438 memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
2439 kvm_rip_write(ctxt->vcpu, c->eip);
2445 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base,
2446 int reg, struct operand *op)
2448 struct decode_cache *c = &ctxt->decode;
2449 int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
2451 register_address_increment(c, &c->regs[reg], df * op->bytes);
2452 op->ptr = (unsigned long *)register_address(c, base, c->regs[reg]);
2456 x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
2459 struct decode_cache *c = &ctxt->decode;
2460 int rc = X86EMUL_CONTINUE;
2461 int saved_dst_type = c->dst.type;
2463 ctxt->interruptibility = 0;
2465 /* Shadow copy of register state. Committed on successful emulation.
2466 * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't
2470 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
2472 if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
2473 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2477 /* LOCK prefix is allowed only with some instructions */
2478 if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) {
2479 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2483 /* Privileged instruction can be executed only in CPL=0 */
2484 if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
2485 kvm_inject_gp(ctxt->vcpu, 0);
2489 if (c->rep_prefix && (c->d & String)) {
2490 ctxt->restart = true;
2491 /* All REP prefixes have the same first termination condition */
2492 if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) {
2494 ctxt->restart = false;
2495 kvm_rip_write(ctxt->vcpu, c->eip);
2498 /* The second termination condition only applies for REPE
2499 * and REPNE. Test if the repeat string operation prefix is
2500 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
2501 * corresponding termination condition according to:
2502 * - if REPE/REPZ and ZF = 0 then done
2503 * - if REPNE/REPNZ and ZF = 1 then done
2505 if ((c->b == 0xa6) || (c->b == 0xa7) ||
2506 (c->b == 0xae) || (c->b == 0xaf)) {
2507 if ((c->rep_prefix == REPE_PREFIX) &&
2508 ((ctxt->eflags & EFLG_ZF) == 0))
2510 if ((c->rep_prefix == REPNE_PREFIX) &&
2511 ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))
2517 if (c->src.type == OP_MEM) {
2518 rc = ops->read_emulated((unsigned long)c->src.ptr,
2522 if (rc != X86EMUL_CONTINUE)
2524 c->src.orig_val = c->src.val;
2527 if (c->src2.type == OP_MEM) {
2528 rc = ops->read_emulated((unsigned long)c->src2.ptr,
2532 if (rc != X86EMUL_CONTINUE)
2536 if ((c->d & DstMask) == ImplicitOps)
2540 if ((c->dst.type == OP_MEM) && !(c->d & Mov)) {
2541 /* optimisation - avoid slow emulated read if Mov */
2542 rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val,
2543 c->dst.bytes, ctxt->vcpu);
2544 if (rc != X86EMUL_CONTINUE)
2547 c->dst.orig_val = c->dst.val;
2557 emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
2559 case 0x06: /* push es */
2560 emulate_push_sreg(ctxt, VCPU_SREG_ES);
2562 case 0x07: /* pop es */
2563 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
2564 if (rc != X86EMUL_CONTINUE)
2569 emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
2571 case 0x0e: /* push cs */
2572 emulate_push_sreg(ctxt, VCPU_SREG_CS);
2576 emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
2578 case 0x16: /* push ss */
2579 emulate_push_sreg(ctxt, VCPU_SREG_SS);
2581 case 0x17: /* pop ss */
2582 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
2583 if (rc != X86EMUL_CONTINUE)
2588 emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
2590 case 0x1e: /* push ds */
2591 emulate_push_sreg(ctxt, VCPU_SREG_DS);
2593 case 0x1f: /* pop ds */
2594 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
2595 if (rc != X86EMUL_CONTINUE)
2600 emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
2604 emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
2608 emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
2612 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
2614 case 0x40 ... 0x47: /* inc r16/r32 */
2615 emulate_1op("inc", c->dst, ctxt->eflags);
2617 case 0x48 ... 0x4f: /* dec r16/r32 */
2618 emulate_1op("dec", c->dst, ctxt->eflags);
2620 case 0x50 ... 0x57: /* push reg */
2623 case 0x58 ... 0x5f: /* pop reg */
2625 rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
2626 if (rc != X86EMUL_CONTINUE)
2629 case 0x60: /* pusha */
2630 emulate_pusha(ctxt);
2632 case 0x61: /* popa */
2633 rc = emulate_popa(ctxt, ops);
2634 if (rc != X86EMUL_CONTINUE)
2637 case 0x63: /* movsxd */
2638 if (ctxt->mode != X86EMUL_MODE_PROT64)
2639 goto cannot_emulate;
2640 c->dst.val = (s32) c->src.val;
2642 case 0x68: /* push imm */
2643 case 0x6a: /* push imm8 */
2646 case 0x6c: /* insb */
2647 case 0x6d: /* insw/insd */
2648 c->dst.bytes = min(c->dst.bytes, 4u);
2649 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2651 kvm_inject_gp(ctxt->vcpu, 0);
2654 if (!pio_in_emulated(ctxt, ops, c->dst.bytes,
2655 c->regs[VCPU_REGS_RDX], &c->dst.val))
2656 goto done; /* IO is needed, skip writeback */
2658 case 0x6e: /* outsb */
2659 case 0x6f: /* outsw/outsd */
2660 c->src.bytes = min(c->src.bytes, 4u);
2661 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2663 kvm_inject_gp(ctxt->vcpu, 0);
2666 ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX],
2667 &c->src.val, 1, ctxt->vcpu);
2669 c->dst.type = OP_NONE; /* nothing to writeback */
2671 case 0x70 ... 0x7f: /* jcc (short) */
2672 if (test_cc(c->b, ctxt->eflags))
2673 jmp_rel(c, c->src.val);
2675 case 0x80 ... 0x83: /* Grp1 */
2676 switch (c->modrm_reg) {
2696 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
2698 case 0x86 ... 0x87: /* xchg */
2700 /* Write back the register source. */
2701 switch (c->dst.bytes) {
2703 *(u8 *) c->src.ptr = (u8) c->dst.val;
2706 *(u16 *) c->src.ptr = (u16) c->dst.val;
2709 *c->src.ptr = (u32) c->dst.val;
2710 break; /* 64b reg: zero-extend */
2712 *c->src.ptr = c->dst.val;
2716 * Write back the memory destination with implicit LOCK
2719 c->dst.val = c->src.val;
2722 case 0x88 ... 0x8b: /* mov */
2724 case 0x8c: { /* mov r/m, sreg */
2725 struct kvm_segment segreg;
2727 if (c->modrm_reg <= VCPU_SREG_GS)
2728 kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg);
2730 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2733 c->dst.val = segreg.selector;
2736 case 0x8d: /* lea r16/r32, m */
2737 c->dst.val = c->modrm_ea;
2739 case 0x8e: { /* mov seg, r/m16 */
2744 if (c->modrm_reg == VCPU_SREG_CS ||
2745 c->modrm_reg > VCPU_SREG_GS) {
2746 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2750 if (c->modrm_reg == VCPU_SREG_SS)
2751 toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS);
2753 rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg);
2755 c->dst.type = OP_NONE; /* Disable writeback. */
2758 case 0x8f: /* pop (sole member of Grp1a) */
2759 rc = emulate_grp1a(ctxt, ops);
2760 if (rc != X86EMUL_CONTINUE)
2763 case 0x90: /* nop / xchg r8,rax */
2764 if (!(c->rex_prefix & 1)) { /* nop */
2765 c->dst.type = OP_NONE;
2768 case 0x91 ... 0x97: /* xchg reg,rax */
2769 c->src.type = c->dst.type = OP_REG;
2770 c->src.bytes = c->dst.bytes = c->op_bytes;
2771 c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX];
2772 c->src.val = *(c->src.ptr);
2774 case 0x9c: /* pushf */
2775 c->src.val = (unsigned long) ctxt->eflags;
2778 case 0x9d: /* popf */
2779 c->dst.type = OP_REG;
2780 c->dst.ptr = (unsigned long *) &ctxt->eflags;
2781 c->dst.bytes = c->op_bytes;
2782 rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
2783 if (rc != X86EMUL_CONTINUE)
2786 case 0xa0 ... 0xa1: /* mov */
2787 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
2788 c->dst.val = c->src.val;
2790 case 0xa2 ... 0xa3: /* mov */
2791 c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX];
2793 case 0xa4 ... 0xa5: /* movs */
2795 case 0xa6 ... 0xa7: /* cmps */
2796 c->dst.type = OP_NONE; /* Disable writeback. */
2797 DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
2799 case 0xaa ... 0xab: /* stos */
2800 c->dst.val = c->regs[VCPU_REGS_RAX];
2802 case 0xac ... 0xad: /* lods */
2804 case 0xae ... 0xaf: /* scas */
2805 DPRINTF("Urk! I don't handle SCAS.\n");
2806 goto cannot_emulate;
2807 case 0xb0 ... 0xbf: /* mov r, imm */
2812 case 0xc3: /* ret */
2813 c->dst.type = OP_REG;
2814 c->dst.ptr = &c->eip;
2815 c->dst.bytes = c->op_bytes;
2816 goto pop_instruction;
2817 case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
2819 c->dst.val = c->src.val;
2821 case 0xcb: /* ret far */
2822 rc = emulate_ret_far(ctxt, ops);
2823 if (rc != X86EMUL_CONTINUE)
2826 case 0xd0 ... 0xd1: /* Grp2 */
2830 case 0xd2 ... 0xd3: /* Grp2 */
2831 c->src.val = c->regs[VCPU_REGS_RCX];
2834 case 0xe4: /* inb */
2837 case 0xe6: /* outb */
2838 case 0xe7: /* out */
2840 case 0xe8: /* call (near) */ {
2841 long int rel = c->src.val;
2842 c->src.val = (unsigned long) c->eip;
2847 case 0xe9: /* jmp rel */
2849 case 0xea: /* jmp far */
2851 if (load_segment_descriptor(ctxt, ops, c->src2.val,
2855 c->eip = c->src.val;
2858 jmp: /* jmp rel short */
2859 jmp_rel(c, c->src.val);
2860 c->dst.type = OP_NONE; /* Disable writeback. */
2862 case 0xec: /* in al,dx */
2863 case 0xed: /* in (e/r)ax,dx */
2864 c->src.val = c->regs[VCPU_REGS_RDX];
2866 c->dst.bytes = min(c->dst.bytes, 4u);
2867 if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
2868 kvm_inject_gp(ctxt->vcpu, 0);
2871 if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val,
2873 goto done; /* IO is needed */
2875 case 0xee: /* out al,dx */
2876 case 0xef: /* out (e/r)ax,dx */
2877 c->src.val = c->regs[VCPU_REGS_RDX];
2879 c->dst.bytes = min(c->dst.bytes, 4u);
2880 if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
2881 kvm_inject_gp(ctxt->vcpu, 0);
2884 ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1,
2886 c->dst.type = OP_NONE; /* Disable writeback. */
2888 case 0xf4: /* hlt */
2889 ctxt->vcpu->arch.halt_request = 1;
2891 case 0xf5: /* cmc */
2892 /* complement carry flag from eflags reg */
2893 ctxt->eflags ^= EFLG_CF;
2894 c->dst.type = OP_NONE; /* Disable writeback. */
2896 case 0xf6 ... 0xf7: /* Grp3 */
2897 if (!emulate_grp3(ctxt, ops))
2898 goto cannot_emulate;
2900 case 0xf8: /* clc */
2901 ctxt->eflags &= ~EFLG_CF;
2902 c->dst.type = OP_NONE; /* Disable writeback. */
2904 case 0xfa: /* cli */
2905 if (emulator_bad_iopl(ctxt, ops))
2906 kvm_inject_gp(ctxt->vcpu, 0);
2908 ctxt->eflags &= ~X86_EFLAGS_IF;
2909 c->dst.type = OP_NONE; /* Disable writeback. */
2912 case 0xfb: /* sti */
2913 if (emulator_bad_iopl(ctxt, ops))
2914 kvm_inject_gp(ctxt->vcpu, 0);
2916 toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI);
2917 ctxt->eflags |= X86_EFLAGS_IF;
2918 c->dst.type = OP_NONE; /* Disable writeback. */
2921 case 0xfc: /* cld */
2922 ctxt->eflags &= ~EFLG_DF;
2923 c->dst.type = OP_NONE; /* Disable writeback. */
2925 case 0xfd: /* std */
2926 ctxt->eflags |= EFLG_DF;
2927 c->dst.type = OP_NONE; /* Disable writeback. */
2929 case 0xfe: /* Grp4 */
2931 rc = emulate_grp45(ctxt, ops);
2932 if (rc != X86EMUL_CONTINUE)
2935 case 0xff: /* Grp5 */
2936 if (c->modrm_reg == 5)
2942 rc = writeback(ctxt, ops);
2943 if (rc != X86EMUL_CONTINUE)
2947 * restore dst type in case the decoding will be reused
2948 * (happens for string instruction )
2950 c->dst.type = saved_dst_type;
2952 if ((c->d & SrcMask) == SrcSI)
2953 string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI,
2956 if ((c->d & DstMask) == DstDI)
2957 string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst);
2959 if (c->rep_prefix && (c->d & String)) {
2960 struct read_cache *rc = &ctxt->decode.io_read;
2961 register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
2963 * Re-enter guest when pio read ahead buffer is empty or,
2964 * if it is not used, after each 1024 iteration.
2966 if ((rc->end == 0 && !(c->regs[VCPU_REGS_RCX] & 0x3ff)) ||
2967 (rc->end != 0 && rc->end == rc->pos))
2968 ctxt->restart = false;
2971 /* Commit shadow register state. */
2972 memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
2973 kvm_rip_write(ctxt->vcpu, c->eip);
2974 ops->set_rflags(ctxt->vcpu, ctxt->eflags);
2977 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
2981 case 0x01: /* lgdt, lidt, lmsw */
2982 switch (c->modrm_reg) {
2984 unsigned long address;
2986 case 0: /* vmcall */
2987 if (c->modrm_mod != 3 || c->modrm_rm != 1)
2988 goto cannot_emulate;
2990 rc = kvm_fix_hypercall(ctxt->vcpu);
2991 if (rc != X86EMUL_CONTINUE)
2994 /* Let the processor re-execute the fixed hypercall */
2996 /* Disable writeback. */
2997 c->dst.type = OP_NONE;
3000 rc = read_descriptor(ctxt, ops, c->src.ptr,
3001 &size, &address, c->op_bytes);
3002 if (rc != X86EMUL_CONTINUE)
3004 realmode_lgdt(ctxt->vcpu, size, address);
3005 /* Disable writeback. */
3006 c->dst.type = OP_NONE;
3008 case 3: /* lidt/vmmcall */
3009 if (c->modrm_mod == 3) {
3010 switch (c->modrm_rm) {
3012 rc = kvm_fix_hypercall(ctxt->vcpu);
3013 if (rc != X86EMUL_CONTINUE)
3017 goto cannot_emulate;
3020 rc = read_descriptor(ctxt, ops, c->src.ptr,
3023 if (rc != X86EMUL_CONTINUE)
3025 realmode_lidt(ctxt->vcpu, size, address);
3027 /* Disable writeback. */
3028 c->dst.type = OP_NONE;
3032 c->dst.val = ops->get_cr(0, ctxt->vcpu);
3035 ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0ful) |
3036 (c->src.val & 0x0f), ctxt->vcpu);
3037 c->dst.type = OP_NONE;
3039 case 5: /* not defined */
3040 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3043 emulate_invlpg(ctxt->vcpu, c->modrm_ea);
3044 /* Disable writeback. */
3045 c->dst.type = OP_NONE;
3048 goto cannot_emulate;
3051 case 0x05: /* syscall */
3052 rc = emulate_syscall(ctxt);
3053 if (rc != X86EMUL_CONTINUE)
3059 emulate_clts(ctxt->vcpu);
3060 c->dst.type = OP_NONE;
3062 case 0x08: /* invd */
3063 case 0x09: /* wbinvd */
3064 case 0x0d: /* GrpP (prefetch) */
3065 case 0x18: /* Grp16 (prefetch/nop) */
3066 c->dst.type = OP_NONE;
3068 case 0x20: /* mov cr, reg */
3069 switch (c->modrm_reg) {
3073 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3076 c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu);
3077 c->dst.type = OP_NONE; /* no writeback */
3079 case 0x21: /* mov from dr to reg */
3080 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3081 (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3082 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3085 emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]);
3086 c->dst.type = OP_NONE; /* no writeback */
3088 case 0x22: /* mov reg, cr */
3089 ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu);
3090 c->dst.type = OP_NONE;
3092 case 0x23: /* mov from reg to dr */
3093 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3094 (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3095 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3098 emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]);
3099 c->dst.type = OP_NONE; /* no writeback */
3103 msr_data = (u32)c->regs[VCPU_REGS_RAX]
3104 | ((u64)c->regs[VCPU_REGS_RDX] << 32);
3105 if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) {
3106 kvm_inject_gp(ctxt->vcpu, 0);
3109 rc = X86EMUL_CONTINUE;
3110 c->dst.type = OP_NONE;
3114 if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) {
3115 kvm_inject_gp(ctxt->vcpu, 0);
3118 c->regs[VCPU_REGS_RAX] = (u32)msr_data;
3119 c->regs[VCPU_REGS_RDX] = msr_data >> 32;
3121 rc = X86EMUL_CONTINUE;
3122 c->dst.type = OP_NONE;
3124 case 0x34: /* sysenter */
3125 rc = emulate_sysenter(ctxt);
3126 if (rc != X86EMUL_CONTINUE)
3131 case 0x35: /* sysexit */
3132 rc = emulate_sysexit(ctxt);
3133 if (rc != X86EMUL_CONTINUE)
3138 case 0x40 ... 0x4f: /* cmov */
3139 c->dst.val = c->dst.orig_val = c->src.val;
3140 if (!test_cc(c->b, ctxt->eflags))
3141 c->dst.type = OP_NONE; /* no writeback */
3143 case 0x80 ... 0x8f: /* jnz rel, etc*/
3144 if (test_cc(c->b, ctxt->eflags))
3145 jmp_rel(c, c->src.val);
3146 c->dst.type = OP_NONE;
3148 case 0xa0: /* push fs */
3149 emulate_push_sreg(ctxt, VCPU_SREG_FS);
3151 case 0xa1: /* pop fs */
3152 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
3153 if (rc != X86EMUL_CONTINUE)
3158 c->dst.type = OP_NONE;
3159 /* only subword offset */
3160 c->src.val &= (c->dst.bytes << 3) - 1;
3161 emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
3163 case 0xa4: /* shld imm8, r, r/m */
3164 case 0xa5: /* shld cl, r, r/m */
3165 emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
3167 case 0xa8: /* push gs */
3168 emulate_push_sreg(ctxt, VCPU_SREG_GS);
3170 case 0xa9: /* pop gs */
3171 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
3172 if (rc != X86EMUL_CONTINUE)
3177 /* only subword offset */
3178 c->src.val &= (c->dst.bytes << 3) - 1;
3179 emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
3181 case 0xac: /* shrd imm8, r, r/m */
3182 case 0xad: /* shrd cl, r, r/m */
3183 emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
3185 case 0xae: /* clflush */
3187 case 0xb0 ... 0xb1: /* cmpxchg */
3189 * Save real source value, then compare EAX against
3192 c->src.orig_val = c->src.val;
3193 c->src.val = c->regs[VCPU_REGS_RAX];
3194 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
3195 if (ctxt->eflags & EFLG_ZF) {
3196 /* Success: write back to memory. */
3197 c->dst.val = c->src.orig_val;
3199 /* Failure: write the value we saw to EAX. */
3200 c->dst.type = OP_REG;
3201 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
3206 /* only subword offset */
3207 c->src.val &= (c->dst.bytes << 3) - 1;
3208 emulate_2op_SrcV_nobyte("btr", c->src, c->dst, ctxt->eflags);
3210 case 0xb6 ... 0xb7: /* movzx */
3211 c->dst.bytes = c->op_bytes;
3212 c->dst.val = (c->d & ByteOp) ? (u8) c->src.val
3215 case 0xba: /* Grp8 */
3216 switch (c->modrm_reg & 3) {
3229 /* only subword offset */
3230 c->src.val &= (c->dst.bytes << 3) - 1;
3231 emulate_2op_SrcV_nobyte("btc", c->src, c->dst, ctxt->eflags);
3233 case 0xbe ... 0xbf: /* movsx */
3234 c->dst.bytes = c->op_bytes;
3235 c->dst.val = (c->d & ByteOp) ? (s8) c->src.val :
3238 case 0xc3: /* movnti */
3239 c->dst.bytes = c->op_bytes;
3240 c->dst.val = (c->op_bytes == 4) ? (u32) c->src.val :
3243 case 0xc7: /* Grp9 (cmpxchg8b) */
3244 rc = emulate_grp9(ctxt, ops);
3245 if (rc != X86EMUL_CONTINUE)
3252 DPRINTF("Cannot emulate %02x\n", c->b);