sched: add new API sched_setscheduler_nocheck: add a flag to control access checks
authorRusty Russell <rusty@rustcorp.com.au>
Mon, 23 Jun 2008 03:55:38 +0000 (13:55 +1000)
committerIngo Molnar <mingo@elte.hu>
Mon, 23 Jun 2008 20:57:56 +0000 (22:57 +0200)
Hidehiro Kawai noticed that sched_setscheduler() can fail in
stop_machine: it calls sched_setscheduler() from insmod, which can
have CAP_SYS_MODULE without CAP_SYS_NICE.

Two cases could have failed, so are changed to sched_setscheduler_nocheck:
  kernel/softirq.c:cpu_callback()
- CPU hotplug callback
  kernel/stop_machine.c:__stop_machine_run()
- Called from various places, including modprobe()

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mm@kvack.org
Cc: sugita <yumiko.sugita.yf@hitachi.com>
Cc: Satoshi OSHIMA <satoshi.oshima.fk@hitachi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
include/linux/sched.h
kernel/sched.c
kernel/softirq.c
kernel/stop_machine.c

index c5d3f847ca8d05bd52ca575608c2d428a23d1b28..fe3b9b5d73905e5b6682faf1f590f4d90a402d9e 100644 (file)
@@ -1655,6 +1655,8 @@ extern int can_nice(const struct task_struct *p, const int nice);
 extern int task_curr(const struct task_struct *p);
 extern int idle_cpu(int cpu);
 extern int sched_setscheduler(struct task_struct *, int, struct sched_param *);
+extern int sched_setscheduler_nocheck(struct task_struct *, int,
+                                     struct sched_param *);
 extern struct task_struct *idle_task(int cpu);
 extern struct task_struct *curr_task(int cpu);
 extern void set_curr_task(int cpu, struct task_struct *p);
index b048ad8a11af1a2b1929ed074bbf3f8e6e81f856..8d7c246ab864cbc35a0af29aa52da05546d83baa 100644 (file)
@@ -4746,16 +4746,8 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
        set_load_weight(p);
 }
 
-/**
- * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
- * @p: the task in question.
- * @policy: new policy.
- * @param: structure containing the new RT priority.
- *
- * NOTE that the task may be already dead.
- */
-int sched_setscheduler(struct task_struct *p, int policy,
-                      struct sched_param *param)
+static int __sched_setscheduler(struct task_struct *p, int policy,
+                               struct sched_param *param, bool user)
 {
        int retval, oldprio, oldpolicy = -1, on_rq, running;
        unsigned long flags;
@@ -4787,7 +4779,7 @@ recheck:
        /*
         * Allow unprivileged RT tasks to decrease priority:
         */
-       if (!capable(CAP_SYS_NICE)) {
+       if (user && !capable(CAP_SYS_NICE)) {
                if (rt_policy(policy)) {
                        unsigned long rlim_rtprio;
 
@@ -4823,7 +4815,8 @@ recheck:
         * Do not allow realtime tasks into groups that have no runtime
         * assigned.
         */
-       if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
+       if (user
+           && rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
                return -EPERM;
 #endif
 
@@ -4872,8 +4865,39 @@ recheck:
 
        return 0;
 }
+
+/**
+ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
+ * @p: the task in question.
+ * @policy: new policy.
+ * @param: structure containing the new RT priority.
+ *
+ * NOTE that the task may be already dead.
+ */
+int sched_setscheduler(struct task_struct *p, int policy,
+                      struct sched_param *param)
+{
+       return __sched_setscheduler(p, policy, param, true);
+}
 EXPORT_SYMBOL_GPL(sched_setscheduler);
 
+/**
+ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
+ * @p: the task in question.
+ * @policy: new policy.
+ * @param: structure containing the new RT priority.
+ *
+ * Just like sched_setscheduler, only don't bother checking if the
+ * current context has permission.  For example, this is needed in
+ * stop_machine(): we create temporary high priority worker threads,
+ * but our caller might not have that capability.
+ */
+int sched_setscheduler_nocheck(struct task_struct *p, int policy,
+                              struct sched_param *param)
+{
+       return __sched_setscheduler(p, policy, param, false);
+}
+
 static int
 do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
 {
index 36e0617400470f398700376c7fa3359d768e7e58..afd9120c2fc41b9fbf0ac2caba469eff39b95e87 100644 (file)
@@ -645,7 +645,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
 
                p = per_cpu(ksoftirqd, hotcpu);
                per_cpu(ksoftirqd, hotcpu) = NULL;
-               sched_setscheduler(p, SCHED_FIFO, &param);
+               sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
                kthread_stop(p);
                takeover_tasklets(hotcpu);
                break;
index b7350bbfb076293dd6d13d7ba060b3aba1f673a9..ba9b2054ecbdb99bbb868b4f9c01af0bb2ce2c30 100644 (file)
@@ -187,7 +187,7 @@ struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
                struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
 
                /* One high-prio thread per cpu.  We'll do this one. */
-               sched_setscheduler(p, SCHED_FIFO, &param);
+               sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
                kthread_bind(p, cpu);
                wake_up_process(p);
                wait_for_completion(&smdata.done);