2 * linux/arch/i386/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
8 * entry.S contains the system-call and fault low-level handling routines.
9 * This also contains the timer-interrupt handler, as well as all interrupts
10 * and faults that can result in a task-switch.
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after a timer-interrupt and after each system call.
15 * I changed all the .align's to 4 (16 byte alignment), as that's faster
18 * Stack layout in 'ret_from_system_call':
19 * ptrace needs to have all regs on the stack.
20 * if the order here is changed, it needs to be
21 * updated in fork.c:copy_process, signal.c:do_signal,
22 * ptrace.c and ptrace.h
40 * "current" is in register %ebx during any slow entries.
43 #include <linux/linkage.h>
44 #include <asm/thread_info.h>
45 #include <asm/irqflags.h>
46 #include <asm/errno.h>
47 #include <asm/segment.h>
51 #include <asm/dwarf2.h>
52 #include "irq_vectors.h"
54 #define nr_syscalls ((syscall_table_size)/4)
79 /* These are replaces for paravirtualization */
80 #define DISABLE_INTERRUPTS cli
81 #define ENABLE_INTERRUPTS sti
82 #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
83 #define INTERRUPT_RETURN iret
84 #define GET_CR0_INTO_EAX movl %cr0, %eax
87 #define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
90 #define resume_kernel restore_nocheck
93 .macro TRACE_IRQS_IRET
94 #ifdef CONFIG_TRACE_IRQFLAGS
95 testl $IF_MASK,EFLAGS(%esp) # interrupts off?
103 #define resume_userspace_sig check_userspace
105 #define resume_userspace_sig resume_userspace
111 CFI_ADJUST_CFA_OFFSET 4;\
112 /*CFI_REL_OFFSET es, 0;*/\
114 CFI_ADJUST_CFA_OFFSET 4;\
115 /*CFI_REL_OFFSET ds, 0;*/\
117 CFI_ADJUST_CFA_OFFSET 4;\
118 CFI_REL_OFFSET eax, 0;\
120 CFI_ADJUST_CFA_OFFSET 4;\
121 CFI_REL_OFFSET ebp, 0;\
123 CFI_ADJUST_CFA_OFFSET 4;\
124 CFI_REL_OFFSET edi, 0;\
126 CFI_ADJUST_CFA_OFFSET 4;\
127 CFI_REL_OFFSET esi, 0;\
129 CFI_ADJUST_CFA_OFFSET 4;\
130 CFI_REL_OFFSET edx, 0;\
132 CFI_ADJUST_CFA_OFFSET 4;\
133 CFI_REL_OFFSET ecx, 0;\
135 CFI_ADJUST_CFA_OFFSET 4;\
136 CFI_REL_OFFSET ebx, 0;\
137 movl $(__USER_DS), %edx; \
141 #define RESTORE_INT_REGS \
143 CFI_ADJUST_CFA_OFFSET -4;\
146 CFI_ADJUST_CFA_OFFSET -4;\
149 CFI_ADJUST_CFA_OFFSET -4;\
152 CFI_ADJUST_CFA_OFFSET -4;\
155 CFI_ADJUST_CFA_OFFSET -4;\
158 CFI_ADJUST_CFA_OFFSET -4;\
161 CFI_ADJUST_CFA_OFFSET -4;\
164 #define RESTORE_REGS \
167 CFI_ADJUST_CFA_OFFSET -4;\
170 CFI_ADJUST_CFA_OFFSET -4;\
172 .section .fixup,"ax"; \
178 .section __ex_table,"a";\
184 #define RING0_INT_FRAME \
185 CFI_STARTPROC simple;\
186 CFI_DEF_CFA esp, 3*4;\
187 /*CFI_OFFSET cs, -2*4;*/\
190 #define RING0_EC_FRAME \
191 CFI_STARTPROC simple;\
192 CFI_DEF_CFA esp, 4*4;\
193 /*CFI_OFFSET cs, -2*4;*/\
196 #define RING0_PTREGS_FRAME \
197 CFI_STARTPROC simple;\
198 CFI_DEF_CFA esp, OLDESP-EBX;\
199 /*CFI_OFFSET cs, CS-OLDESP;*/\
200 CFI_OFFSET eip, EIP-OLDESP;\
201 /*CFI_OFFSET es, ES-OLDESP;*/\
202 /*CFI_OFFSET ds, DS-OLDESP;*/\
203 CFI_OFFSET eax, EAX-OLDESP;\
204 CFI_OFFSET ebp, EBP-OLDESP;\
205 CFI_OFFSET edi, EDI-OLDESP;\
206 CFI_OFFSET esi, ESI-OLDESP;\
207 CFI_OFFSET edx, EDX-OLDESP;\
208 CFI_OFFSET ecx, ECX-OLDESP;\
209 CFI_OFFSET ebx, EBX-OLDESP
214 CFI_ADJUST_CFA_OFFSET 4
216 GET_THREAD_INFO(%ebp)
218 CFI_ADJUST_CFA_OFFSET -4
219 pushl $0x0202 # Reset kernel eflags
220 CFI_ADJUST_CFA_OFFSET 4
222 CFI_ADJUST_CFA_OFFSET -4
227 * Return to user mode is not as complex as all this looks,
228 * but we want the default path for a system call return to
229 * go as quickly as possible which is why some of this is
230 * less clear than it otherwise should be.
233 # userspace resumption stub bypassing syscall exit tracing
239 GET_THREAD_INFO(%ebp)
241 movl EFLAGS(%esp), %eax # mix EFLAGS and CS
243 andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
245 jb resume_kernel # not returning to v8086 or userspace
246 ENTRY(resume_userspace)
247 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
248 # setting need_resched or sigpending
249 # between sampling and the iret
250 movl TI_flags(%ebp), %ecx
251 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
252 # int/exception return?
256 #ifdef CONFIG_PREEMPT
259 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
262 movl TI_flags(%ebp), %ecx # need_resched set ?
263 testb $_TIF_NEED_RESCHED, %cl
265 testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
267 call preempt_schedule_irq
272 /* SYSENTER_RETURN points to after the "sysenter" instruction in
273 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
275 # sysenter call handler stub
276 ENTRY(sysenter_entry)
279 CFI_REGISTER esp, ebp
280 movl TSS_sysenter_esp0(%esp),%esp
283 * No need to follow this irqs on/off section: the syscall
284 * disabled irqs and here we enable it straight after entry:
288 CFI_ADJUST_CFA_OFFSET 4
289 /*CFI_REL_OFFSET ss, 0*/
291 CFI_ADJUST_CFA_OFFSET 4
292 CFI_REL_OFFSET esp, 0
294 CFI_ADJUST_CFA_OFFSET 4
296 CFI_ADJUST_CFA_OFFSET 4
297 /*CFI_REL_OFFSET cs, 0*/
299 * Push current_thread_info()->sysenter_return to the stack.
300 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
301 * pushed above; +8 corresponds to copy_thread's esp0 setting.
303 pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
304 CFI_ADJUST_CFA_OFFSET 4
305 CFI_REL_OFFSET eip, 0
308 * Load the potential sixth argument from user stack.
309 * Careful about security.
311 cmpl $__PAGE_OFFSET-3,%ebp
314 .section __ex_table,"a"
316 .long 1b,syscall_fault
320 CFI_ADJUST_CFA_OFFSET 4
322 GET_THREAD_INFO(%ebp)
324 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
325 testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
326 jnz syscall_trace_entry
327 cmpl $(nr_syscalls), %eax
329 call *sys_call_table(,%eax,4)
333 movl TI_flags(%ebp), %ecx
334 testw $_TIF_ALLWORK_MASK, %cx
335 jne syscall_exit_work
336 /* if something modifies registers it must also disable sysexit */
338 movl OLDESP(%esp), %ecx
341 ENABLE_INTERRUPTS_SYSEXIT
345 # system call handler stub
347 RING0_INT_FRAME # can't unwind into user space anyway
348 pushl %eax # save orig_eax
349 CFI_ADJUST_CFA_OFFSET 4
351 GET_THREAD_INFO(%ebp)
352 testl $TF_MASK,EFLAGS(%esp)
354 orl $_TIF_SINGLESTEP,TI_flags(%ebp)
356 # system call tracing in operation / emulation
357 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
358 testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
359 jnz syscall_trace_entry
360 cmpl $(nr_syscalls), %eax
363 call *sys_call_table(,%eax,4)
364 movl %eax,EAX(%esp) # store the return value
366 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
367 # setting need_resched or sigpending
368 # between sampling and the iret
370 movl TI_flags(%ebp), %ecx
371 testw $_TIF_ALLWORK_MASK, %cx # current->work
372 jne syscall_exit_work
375 movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
376 # Warning: OLDSS(%esp) contains the wrong/random values if we
377 # are returning to the kernel.
378 # See comments in process.c:copy_thread() for details.
379 movb OLDSS(%esp), %ah
381 andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
382 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
384 je ldt_ss # returning to user-space with LDT SS
387 restore_nocheck_notrace:
390 CFI_ADJUST_CFA_OFFSET -4
396 pushl $0 # no error code
400 .section __ex_table,"a"
407 larl OLDSS(%esp), %eax
409 testl $0x00400000, %eax # returning to 32bit stack?
410 jnz restore_nocheck # allright, normal return
411 /* If returning to userspace with 16bit stack,
412 * try to fix the higher word of ESP, as the CPU
414 * This is an "official" bug of all the x86-compatible
415 * CPUs, which we can try to work around to make
416 * dosemu and wine happy. */
417 subl $8, %esp # reserve space for switch16 pointer
418 CFI_ADJUST_CFA_OFFSET 8
422 /* Set up the 16bit stack frame with switch32 pointer on top,
423 * and a switch16 pointer on top of the current frame. */
424 call setup_x86_bogus_stack
425 CFI_ADJUST_CFA_OFFSET -8 # frame has moved
428 lss 20+4(%esp), %esp # switch to 16bit stack
430 .section __ex_table,"a"
436 # perform work that needs to be done immediately before resumption
438 RING0_PTREGS_FRAME # can't unwind into user space anyway
440 testb $_TIF_NEED_RESCHED, %cl
444 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
445 # setting need_resched or sigpending
446 # between sampling and the iret
448 movl TI_flags(%ebp), %ecx
449 andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
450 # than syscall tracing?
452 testb $_TIF_NEED_RESCHED, %cl
455 work_notifysig: # deal with pending signals and
456 # notify-resume requests
457 testl $VM_MASK, EFLAGS(%esp)
459 jne work_notifysig_v86 # returning to kernel-space or
462 call do_notify_resume
463 jmp resume_userspace_sig
468 pushl %ecx # save ti_flags for do_notify_resume
469 CFI_ADJUST_CFA_OFFSET 4
470 call save_v86_state # %eax contains pt_regs pointer
472 CFI_ADJUST_CFA_OFFSET -4
475 call do_notify_resume
476 jmp resume_userspace_sig
479 # perform syscall exit tracing
482 movl $-ENOSYS,EAX(%esp)
485 call do_syscall_trace
487 jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
488 # so must skip actual syscall
489 movl ORIG_EAX(%esp), %eax
490 cmpl $(nr_syscalls), %eax
494 # perform syscall exit tracing
497 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
500 ENABLE_INTERRUPTS # could let do_syscall_trace() call
504 call do_syscall_trace
508 RING0_INT_FRAME # can't unwind into user space anyway
510 pushl %eax # save orig_eax
511 CFI_ADJUST_CFA_OFFSET 4
513 GET_THREAD_INFO(%ebp)
514 movl $-EFAULT,EAX(%esp)
518 movl $-ENOSYS,EAX(%esp)
522 #define FIXUP_ESPFIX_STACK \
524 /* switch to 32bit stack using the pointer on top of 16bit stack */ \
525 lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
526 /* copy data from 16bit stack to 32bit stack */ \
527 call fixup_x86_bogus_stack; \
528 /* put ESP to the proper location */ \
530 #define UNWIND_ESPFIX_STACK \
532 CFI_ADJUST_CFA_OFFSET 4; \
534 /* see if on 16bit stack */ \
535 cmpw $__ESPFIX_SS, %ax; \
538 CFI_ADJUST_CFA_OFFSET -4; \
539 .section .fixup,"ax"; \
540 28: movl $__KERNEL_DS, %eax; \
543 /* switch to 32bit stack */ \
544 FIXUP_ESPFIX_STACK; \
549 * Build the entry stubs and pointer table with
550 * some assembler magic.
557 ENTRY(irq_entries_start)
562 CFI_ADJUST_CFA_OFFSET -4
565 CFI_ADJUST_CFA_OFFSET 4
574 * the CPU automatically disables interrupts when executing an IRQ vector,
575 * so IRQ-flags tracing has to follow that:
586 #define BUILD_INTERRUPT(name, nr) \
590 CFI_ADJUST_CFA_OFFSET 4; \
598 /* The include is where all of the SMP etc. interrupts come from */
599 #include "entry_arch.h"
601 KPROBE_ENTRY(page_fault)
604 CFI_ADJUST_CFA_OFFSET 4
608 CFI_ADJUST_CFA_OFFSET 4
609 /*CFI_REL_OFFSET ds, 0*/
611 CFI_ADJUST_CFA_OFFSET 4
612 CFI_REL_OFFSET eax, 0
615 CFI_ADJUST_CFA_OFFSET 4
616 CFI_REL_OFFSET ebp, 0
618 CFI_ADJUST_CFA_OFFSET 4
619 CFI_REL_OFFSET edi, 0
621 CFI_ADJUST_CFA_OFFSET 4
622 CFI_REL_OFFSET esi, 0
624 CFI_ADJUST_CFA_OFFSET 4
625 CFI_REL_OFFSET edx, 0
628 CFI_ADJUST_CFA_OFFSET 4
629 CFI_REL_OFFSET ecx, 0
631 CFI_ADJUST_CFA_OFFSET 4
632 CFI_REL_OFFSET ebx, 0
635 CFI_ADJUST_CFA_OFFSET 4
636 /*CFI_REL_OFFSET es, 0*/
639 CFI_ADJUST_CFA_OFFSET -4
640 /*CFI_REGISTER es, ecx*/
641 movl ES(%esp), %edi # get the function address
642 movl ORIG_EAX(%esp), %edx # get the error code
643 movl %eax, ORIG_EAX(%esp)
645 /*CFI_REL_OFFSET es, ES*/
646 movl $(__USER_DS), %ecx
649 movl %esp,%eax # pt_regs pointer
651 jmp ret_from_exception
653 KPROBE_END(page_fault)
655 ENTRY(coprocessor_error)
658 CFI_ADJUST_CFA_OFFSET 4
659 pushl $do_coprocessor_error
660 CFI_ADJUST_CFA_OFFSET 4
664 ENTRY(simd_coprocessor_error)
667 CFI_ADJUST_CFA_OFFSET 4
668 pushl $do_simd_coprocessor_error
669 CFI_ADJUST_CFA_OFFSET 4
673 ENTRY(device_not_available)
675 pushl $-1 # mark this as an int
676 CFI_ADJUST_CFA_OFFSET 4
679 testl $0x4, %eax # EM (math emulation bit)
680 jne device_not_available_emulate
682 call math_state_restore
683 jmp ret_from_exception
684 device_not_available_emulate:
685 pushl $0 # temporary storage for ORIG_EIP
686 CFI_ADJUST_CFA_OFFSET 4
689 CFI_ADJUST_CFA_OFFSET -4
690 jmp ret_from_exception
694 * Debug traps and NMI can happen at the one SYSENTER instruction
695 * that sets up the real kernel stack. Check here, since we can't
696 * allow the wrong stack to be used.
698 * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
699 * already pushed 3 words if it hits on the sysenter instruction:
700 * eflags, cs and eip.
702 * We just load the right stack, and push the three (known) values
703 * by hand onto the new stack - while updating the return eip past
704 * the instruction that would have done it for sysenter.
706 #define FIX_STACK(offset, ok, label) \
707 cmpw $__KERNEL_CS,4(%esp); \
710 movl TSS_sysenter_esp0+offset(%esp),%esp; \
711 CFI_DEF_CFA esp, 0; \
714 CFI_ADJUST_CFA_OFFSET 4; \
715 pushl $__KERNEL_CS; \
716 CFI_ADJUST_CFA_OFFSET 4; \
717 pushl $sysenter_past_esp; \
718 CFI_ADJUST_CFA_OFFSET 4; \
719 CFI_REL_OFFSET eip, 0
723 cmpl $sysenter_entry,(%esp)
724 jne debug_stack_correct
725 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
727 pushl $-1 # mark this as an int
728 CFI_ADJUST_CFA_OFFSET 4
730 xorl %edx,%edx # error code 0
731 movl %esp,%eax # pt_regs pointer
733 jmp ret_from_exception
738 * NMI is doubly nasty. It can happen _while_ we're handling
739 * a debug fault, and the debug fault hasn't yet been able to
740 * clear up the stack. So we first check whether we got an
741 * NMI on the sysenter entry path, but after that we need to
742 * check whether we got an NMI on the debug path where the debug
743 * fault happened on the sysenter path.
748 CFI_ADJUST_CFA_OFFSET 4
750 cmpw $__ESPFIX_SS, %ax
752 CFI_ADJUST_CFA_OFFSET -4
754 cmpl $sysenter_entry,(%esp)
757 CFI_ADJUST_CFA_OFFSET 4
759 /* Do not access memory above the end of our stack page,
760 * it might not exist.
762 andl $(THREAD_SIZE-1),%eax
763 cmpl $(THREAD_SIZE-20),%eax
765 CFI_ADJUST_CFA_OFFSET -4
766 jae nmi_stack_correct
767 cmpl $sysenter_entry,12(%esp)
768 je nmi_debug_stack_check
770 /* We have a RING0_INT_FRAME here */
772 CFI_ADJUST_CFA_OFFSET 4
774 xorl %edx,%edx # zero error code
775 movl %esp,%eax # pt_regs pointer
777 jmp restore_nocheck_notrace
782 FIX_STACK(12,nmi_stack_correct, 1)
783 jmp nmi_stack_correct
785 nmi_debug_stack_check:
786 /* We have a RING0_INT_FRAME here */
787 cmpw $__KERNEL_CS,16(%esp)
788 jne nmi_stack_correct
791 cmpl $debug_esp_fix_insn,(%esp)
793 FIX_STACK(24,nmi_stack_correct, 1)
794 jmp nmi_stack_correct
797 /* We have a RING0_INT_FRAME here.
799 * create the pointer to lss back
802 CFI_ADJUST_CFA_OFFSET 4
804 CFI_ADJUST_CFA_OFFSET 4
807 /* copy the iret frame of 12 bytes */
810 CFI_ADJUST_CFA_OFFSET 4
813 CFI_ADJUST_CFA_OFFSET 4
815 FIXUP_ESPFIX_STACK # %eax == %esp
816 CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved
817 xorl %edx,%edx # zero error code
820 lss 12+4(%esp), %esp # back to 16bit stack
823 .section __ex_table,"a"
831 pushl $-1 # mark this as an int
832 CFI_ADJUST_CFA_OFFSET 4
834 xorl %edx,%edx # zero error code
835 movl %esp,%eax # pt_regs pointer
837 jmp ret_from_exception
844 CFI_ADJUST_CFA_OFFSET 4
846 CFI_ADJUST_CFA_OFFSET 4
853 CFI_ADJUST_CFA_OFFSET 4
855 CFI_ADJUST_CFA_OFFSET 4
862 CFI_ADJUST_CFA_OFFSET 4
864 CFI_ADJUST_CFA_OFFSET 4
868 ENTRY(coprocessor_segment_overrun)
871 CFI_ADJUST_CFA_OFFSET 4
872 pushl $do_coprocessor_segment_overrun
873 CFI_ADJUST_CFA_OFFSET 4
879 pushl $do_invalid_TSS
880 CFI_ADJUST_CFA_OFFSET 4
884 ENTRY(segment_not_present)
886 pushl $do_segment_not_present
887 CFI_ADJUST_CFA_OFFSET 4
893 pushl $do_stack_segment
894 CFI_ADJUST_CFA_OFFSET 4
898 KPROBE_ENTRY(general_protection)
900 pushl $do_general_protection
901 CFI_ADJUST_CFA_OFFSET 4
904 KPROBE_END(general_protection)
906 ENTRY(alignment_check)
908 pushl $do_alignment_check
909 CFI_ADJUST_CFA_OFFSET 4
915 pushl $0 # no error code
916 CFI_ADJUST_CFA_OFFSET 4
917 pushl $do_divide_error
918 CFI_ADJUST_CFA_OFFSET 4
922 #ifdef CONFIG_X86_MCE
926 CFI_ADJUST_CFA_OFFSET 4
927 pushl machine_check_vector
928 CFI_ADJUST_CFA_OFFSET 4
933 ENTRY(spurious_interrupt_bug)
936 CFI_ADJUST_CFA_OFFSET 4
937 pushl $do_spurious_interrupt_bug
938 CFI_ADJUST_CFA_OFFSET 4
942 #ifdef CONFIG_STACK_UNWIND
943 ENTRY(arch_unwind_init_running)
956 movl $__USER_DS, DS(%edx)
957 movl $__USER_DS, ES(%edx)
958 movl %ebx, ORIG_EAX(%edx)
961 movl $__KERNEL_CS, CS(%edx)
962 movl %ebx, EFLAGS(%edx)
963 movl %eax, OLDESP(%edx)
967 movl $__KERNEL_DS, OLDSS(%edx)
970 ENDPROC(arch_unwind_init_running)
973 ENTRY(kernel_thread_helper)
974 pushl $0 # fake return address for unwinder
978 CFI_ADJUST_CFA_OFFSET 4
981 CFI_ADJUST_CFA_OFFSET 4
984 ENDPROC(kernel_thread_helper)
987 #include "syscall_table.S"
989 syscall_table_size=(.-sys_call_table)