#include <linux/bootmem.h>
#include <linux/debugfs.h>
#include <linux/ctype.h>
+#include <linux/ftrace.h>
#include <asm/tlb.h>
#include <asm/irq_regs.h>
# define const_debug static const
#endif
+/**
+ * runqueue_is_locked
+ *
+ * Returns true if the current cpu runqueue is locked.
+ * This interface allows printk to be called with the runqueue lock
+ * held and know whether or not it is OK to wake up the klogd.
+ */
+int runqueue_is_locked(void)
+{
+ int cpu = get_cpu();
+ struct rq *rq = cpu_rq(cpu);
+ int ret;
+
+ ret = spin_is_locked(&rq->lock);
+ put_cpu();
+ return ret;
+}
+
/*
* Debugging: various feature bits
*/
p->se.last_wakeup = 0;
}
+ sched_info_dequeued(p);
p->sched_class->dequeue_task(rq, p, sleep);
p->se.on_rq = 0;
}
success = 1;
out_running:
+ trace_mark(kernel_sched_wakeup,
+ "pid %d state %ld ## rq %p task %p rq->curr %p",
+ p->pid, p->state, rq, p, rq->curr);
check_preempt_curr(rq, p);
p->state = TASK_RUNNING;
p->sched_class->task_new(rq, p);
inc_nr_running(rq);
}
+ trace_mark(kernel_sched_wakeup_new,
+ "pid %d state %ld ## rq %p task %p rq->curr %p",
+ p->pid, p->state, rq, p, rq->curr);
check_preempt_curr(rq, p);
#ifdef CONFIG_SMP
if (p->sched_class->task_wake_up)
struct mm_struct *mm, *oldmm;
prepare_task_switch(rq, prev, next);
+ trace_mark(kernel_sched_schedule,
+ "prev_pid %d next_pid %d prev_state %ld "
+ "## rq %p prev %p next %p",
+ prev->pid, next->pid, prev->state,
+ rq, prev, next);
mm = next->mm;
oldmm = prev->active_mm;
/*
#endif
}
-#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
+#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
+ defined(CONFIG_PREEMPT_TRACER))
+
+static inline unsigned long get_parent_ip(unsigned long addr)
+{
+ if (in_lock_functions(addr)) {
+ addr = CALLER_ADDR2;
+ if (in_lock_functions(addr))
+ addr = CALLER_ADDR3;
+ }
+ return addr;
+}
void __kprobes add_preempt_count(int val)
{
+#ifdef CONFIG_DEBUG_PREEMPT
/*
* Underflow?
*/
if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
return;
+#endif
preempt_count() += val;
+#ifdef CONFIG_DEBUG_PREEMPT
/*
* Spinlock count overflowing soon?
*/
DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
PREEMPT_MASK - 10);
+#endif
+ if (preempt_count() == val)
+ trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
}
EXPORT_SYMBOL(add_preempt_count);
void __kprobes sub_preempt_count(int val)
{
+#ifdef CONFIG_DEBUG_PREEMPT
/*
* Underflow?
*/
if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
!(preempt_count() & PREEMPT_MASK)))
return;
+#endif
+ if (preempt_count() == val)
+ trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
preempt_count() -= val;
}
EXPORT_SYMBOL(sub_preempt_count);
set_load_weight(p);
}
-/**
- * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
- * @p: the task in question.
- * @policy: new policy.
- * @param: structure containing the new RT priority.
- *
- * NOTE that the task may be already dead.
- */
-int sched_setscheduler(struct task_struct *p, int policy,
- struct sched_param *param)
+static int __sched_setscheduler(struct task_struct *p, int policy,
+ struct sched_param *param, bool user)
{
int retval, oldprio, oldpolicy = -1, on_rq, running;
unsigned long flags;
/*
* Allow unprivileged RT tasks to decrease priority:
*/
- if (!capable(CAP_SYS_NICE)) {
+ if (user && !capable(CAP_SYS_NICE)) {
if (rt_policy(policy)) {
unsigned long rlim_rtprio;
* Do not allow realtime tasks into groups that have no runtime
* assigned.
*/
- if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
+ if (user
+ && rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
return -EPERM;
#endif
return 0;
}
+
+/**
+ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
+ * @p: the task in question.
+ * @policy: new policy.
+ * @param: structure containing the new RT priority.
+ *
+ * NOTE that the task may be already dead.
+ */
+int sched_setscheduler(struct task_struct *p, int policy,
+ struct sched_param *param)
+{
+ return __sched_setscheduler(p, policy, param, true);
+}
EXPORT_SYMBOL_GPL(sched_setscheduler);
+/**
+ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
+ * @p: the task in question.
+ * @policy: new policy.
+ * @param: structure containing the new RT priority.
+ *
+ * Just like sched_setscheduler, only don't bother checking if the
+ * current context has permission. For example, this is needed in
+ * stop_machine(): we create temporary high priority worker threads,
+ * but our caller might not have that capability.
+ */
+int sched_setscheduler_nocheck(struct task_struct *p, int policy,
+ struct sched_param *param)
+{
+ return __sched_setscheduler(p, policy, param, false);
+}
+
static int
do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
{
return retval;
}
-static const char stat_nam[] = "RSDTtZX";
+static const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
void sched_show_task(struct task_struct *p)
{
double_rq_lock(rq_src, rq_dest);
/* Already moved. */
if (task_cpu(p) != src_cpu)
- goto out;
+ goto done;
/* Affinity changed (again). */
if (!cpu_isset(dest_cpu, p->cpus_allowed))
- goto out;
+ goto fail;
on_rq = p->se.on_rq;
if (on_rq)
activate_task(rq_dest, p, 0);
check_preempt_curr(rq_dest, p);
}
+done:
ret = 1;
-out:
+fail:
double_rq_unlock(rq_src, rq_dest);
return ret;
}
next = pick_next_task(rq, rq->curr);
if (!next)
break;
+ next->sched_class->put_prev_task(rq, next);
migrate_dead(dead_cpu, next);
}
min_val = INT_MAX;
- for (i = 0; i < MAX_NUMNODES; i++) {
+ for (i = 0; i < nr_node_ids; i++) {
/* Start at @node */
- n = (node + i) % MAX_NUMNODES;
+ n = (node + i) % nr_node_ids;
if (!nr_cpus_node(n))
continue;
if (!sched_group_nodes)
continue;
- for (i = 0; i < MAX_NUMNODES; i++) {
+ for (i = 0; i < nr_node_ids; i++) {
struct sched_group *oldsg, *sg = sched_group_nodes[i];
*nodemask = node_to_cpumask(i);
/*
* Allocate the per-node list of sched groups
*/
- sched_group_nodes = kcalloc(MAX_NUMNODES, sizeof(struct sched_group *),
+ sched_group_nodes = kcalloc(nr_node_ids, sizeof(struct sched_group *),
GFP_KERNEL);
if (!sched_group_nodes) {
printk(KERN_WARNING "Can not alloc sched group node list\n");
#endif
/* Set up physical groups */
- for (i = 0; i < MAX_NUMNODES; i++) {
+ for (i = 0; i < nr_node_ids; i++) {
SCHED_CPUMASK_VAR(nodemask, allmasks);
SCHED_CPUMASK_VAR(send_covered, allmasks);
send_covered, tmpmask);
}
- for (i = 0; i < MAX_NUMNODES; i++) {
+ for (i = 0; i < nr_node_ids; i++) {
/* Set up node groups */
struct sched_group *sg, *prev;
SCHED_CPUMASK_VAR(nodemask, allmasks);
cpus_or(*covered, *covered, *nodemask);
prev = sg;
- for (j = 0; j < MAX_NUMNODES; j++) {
+ for (j = 0; j < nr_node_ids; j++) {
SCHED_CPUMASK_VAR(notcovered, allmasks);
- int n = (i + j) % MAX_NUMNODES;
+ int n = (i + j) % nr_node_ids;
node_to_cpumask_ptr(pnodemask, n);
cpus_complement(*notcovered, *covered);
}
#ifdef CONFIG_NUMA
- for (i = 0; i < MAX_NUMNODES; i++)
+ for (i = 0; i < nr_node_ids; i++)
init_numa_sched_groups_power(sched_group_nodes[i]);
if (sd_allnodes) {
#endif
#ifdef CONFIG_SMP
- open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL);
+ open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
#endif
#ifdef CONFIG_RT_MUTEXES
rt_period = (u64)rt_period_us * NSEC_PER_USEC;
rt_runtime = tg->rt_bandwidth.rt_runtime;
+ if (rt_period == 0)
+ return -EINVAL;
+
return tg_set_bandwidth(tg, rt_period, rt_runtime);
}