1.7 What is sched_load_balance ?
--------------------------------
-The kernel scheduler (kernel/sched.c) automatically load balances
+The kernel scheduler (kernel/sched/core.c) automatically load balances
tasks. If one CPU is underutilized, kernel code running on that
CPU will look for tasks on other more overloaded CPUs and move those
tasks to itself, within the constraints of such placement mechanisms
__rt_mutex_adjust_prio examines the result of rt_mutex_getprio, and if the
result does not equal the task's current priority, then rt_mutex_setprio
is called to adjust the priority of the task to the new priority.
-Note that rt_mutex_setprio is defined in kernel/sched.c to implement the
+Note that rt_mutex_setprio is defined in kernel/sched/core.c to implement the
actual change in priority.
It is interesting to note that __rt_mutex_adjust_prio can either increase
load of each of its member CPUs, and only when the load of a group becomes
out of balance are tasks moved between groups.
-In kernel/sched.c, trigger_load_balance() is run periodically on each CPU
+In kernel/sched/core.c, trigger_load_balance() is run periodically on each CPU
through scheduler_tick(). It raises a softirq after the next regularly scheduled
rebalancing event for the current runqueue has arrived. The actual load
balancing workhorse, run_rebalance_domains()->rebalance_domains(), is then run
the specifics and what to tune.
Architectures may retain the regular override the default SD_*_INIT flags
-while using the generic domain builder in kernel/sched.c if they wish to
+while using the generic domain builder in kernel/sched/core.c if they wish to
retain the traditional SMT->SMP->NUMA topology (or some subset of that). This
can be done by #define'ing ARCH_HASH_SCHED_TUNE.
But when you do the write-lock, you have to use the irq-safe version.
For an example of being clever with rw-locks, see the "waitqueue_lock"
-handling in kernel/sched.c - nothing ever _changes_ a wait-queue from
+handling in kernel/sched/core.c - nothing ever _changes_ a wait-queue from
within an interrupt, they only read the queue in order to know whom to
wake up. So read-locks are safe (which is good: they are very common
indeed), while write-locks need to protect themselves against interrupts.
at process_kern.c:156
#3 0x1006a052 in switch_to (prev=0x50072000, next=0x507e8000, last=0x50072000)
at process_kern.c:161
- #4 0x10001d12 in schedule () at sched.c:777
+ #4 0x10001d12 in schedule () at core.c:777
#5 0x1006a744 in __down (sem=0x507d241c) at semaphore.c:71
#6 0x1006aa10 in __down_failed () at semaphore.c:157
#7 0x1006c5d8 in segv_handler (sc=0x5006e940) at trap_user.c:174
at process_kern.c:161
161 _switch_to(prev, next);
(gdb)
- #4 0x10001d12 in schedule () at sched.c:777
+ #4 0x10001d12 in schedule () at core.c:777
777 switch_to(prev, next, prev);
(gdb)
#5 0x1006a744 in __down (sem=0x507d241c) at semaphore.c:71
* is actually quite ugly. It might be possible to
* determine the frame size automatically at build
* time by doing this:
- * - compile sched.c
+ * - compile sched/core.c
* - disassemble the resulting sched.o
* - look for 'sub sp,??' shortly after '<schedule>:'
*/
in another register:
! __asm__ ("swapnwbr %2\n\tlz %2,%0"
! : "=r,r" (res), "=r,X" (dummy) : "1,0" (w));
- confuses gcc (sched.c, gcc from cris-dist-1.14). */
+ confuses gcc (core.c, gcc from cris-dist-1.14). */
unsigned long res;
__asm__ ("swapnwbr %0 \n\t"
* Return a CPU-local timestamp in nano-seconds. This timestamp is
* NOT synchronized across CPUs its return value must never be
* compared against the values returned on another CPU. The usage in
- * kernel/sched.c ensures that.
+ * kernel/sched/core.c ensures that.
*
* The return-value of sched_clock() is NOT supposed to wrap-around.
* If it did, it would cause some scheduling hiccups (at the worst).
* FPU affinity with the user's requested processor affinity.
* This code is 98% identical with the sys_sched_setaffinity()
* and sys_sched_getaffinity() system calls, and should be
- * updated when kernel/sched.c changes.
+ * updated when kernel/sched/core.c changes.
*/
/*
* find_process_by_pid - find a process with a matching PID value.
- * used in sys_sched_set/getaffinity() in kernel/sched.c, so
+ * used in sys_sched_set/getaffinity() in kernel/sched/core.c, so
* cloned here.
*/
static inline struct task_struct *find_process_by_pid(pid_t pid)
/*
* For FPU affinity scheduling on MIPS MT processors, we need to
* intercept sys_sched_xxxaffinity() calls until we get a proper hook
- * in kernel/sched.c. Considered only temporary we only support these
- * hooks for the 32-bit kernel - there is no MIPS64 MT processor atm.
+ * in kernel/sched/core.c. Considered only temporary we only support
+ * these hooks for the 32-bit kernel - there is no MIPS64 MT processor
+ * atm.
*/
sys mipsmt_sys_sched_setaffinity 3
sys mipsmt_sys_sched_getaffinity 3
/*
* switch_mm is the entry point called from the architecture independent
- * code in kernel/sched.c
+ * code in kernel/sched/core.c
*/
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
/*
* Return saved (kernel) PC of a blocked thread.
- * Only used in a printk() in kernel/sched.c, so don't work too hard.
+ * Only used in a printk() in kernel/sched/core.c, so don't work too hard.
*/
#define thread_saved_pc(t) ((t)->thread.pc)
regs_to_pt_regs(®s, pc, lr, sp, r52));
}
-/* This is called only from kernel/sched.c, with esp == NULL */
+/* This is called only from kernel/sched/core.c, with esp == NULL */
void show_stack(struct task_struct *task, unsigned long *esp)
{
struct KBacktraceIterator kbt;
static const int kstack_depth_to_print = 24;
/* This recently started being used in arch-independent code too, as in
- * kernel/sched.c.*/
+ * kernel/sched/core.c.*/
void show_stack(struct task_struct *task, unsigned long *esp)
{
unsigned long *stack;
* (C) Copyright 2001 Linus Torvalds
*
* Atomic wait-for-completion handler data structures.
- * See kernel/sched.c for details.
+ * See kernel/sched/core.c for details.
*/
#include <linux/wait.h>
#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
/*
- * This has to have a higher priority than migration_notifier in sched.c.
+ * This has to have a higher priority than migration_notifier in sched/core.c.
*/
#define perf_cpu_notifier(fn) \
do { \
#else /* DEBUG_SPINLOCK */
#define arch_spin_is_locked(lock) ((void)(lock), 0)
-/* for sched.c and kernel_lock.c: */
+/* for sched/core.c and kernel_lock.c: */
# define arch_spin_lock(lock) do { barrier(); (void)(lock); } while (0)
# define arch_spin_lock_flags(lock, flags) do { barrier(); (void)(lock); } while (0)
# define arch_spin_unlock(lock) do { barrier(); (void)(lock); } while (0)
#define __NR_ptrace 117
__SYSCALL(__NR_ptrace, sys_ptrace)
-/* kernel/sched.c */
+/* kernel/sched/core.c */
#define __NR_sched_setparam 118
__SYSCALL(__NR_sched_setparam, sys_sched_setparam)
#define __NR_sched_setscheduler 119
* This function builds a partial partition of the systems CPUs
* A 'partial partition' is a set of non-overlapping subsets whose
* union is a subset of that set.
- * The output of this function needs to be passed to kernel/sched.c
+ * The output of this function needs to be passed to kernel/sched/core.c
* partition_sched_domains() routine, which will rebuild the scheduler's
* load balancing domains (sched domains) as specified by that partial
* partition.
* is a subset of one of these domains, while there are as
* many such domains as possible, each as small as possible.
* doms - Conversion of 'csa' to an array of cpumasks, for passing to
- * the kernel/sched.c routine partition_sched_domains() in a
+ * the kernel/sched/core.c routine partition_sched_domains() in a
* convenient format, that can be easily compared to the prior
* value to determine what partition elements (sched domains)
* were changed (added or removed.)
* Modification history kernel/time.c
*
* 1993-09-02 Philip Gladstone
- * Created file with time related functions from sched.c and adjtimex()
+ * Created file with time related functions from sched/core.c and adjtimex()
* 1993-10-08 Torsten Duwe
* adjtime interface update and CMOS clock write code
* 1995-08-13 Torsten Duwe
/*
* Scheduler hooks for concurrency managed workqueue. Only to be used from
- * sched.c and workqueue.c.
+ * sched/core.c and workqueue.c.
*/
void wq_worker_waking_up(struct task_struct *task, int cpu);
struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu);