sched/cputime: Fix clock_nanosleep()/clock_gettime() inconsistency
[firefly-linux-kernel-4.4.55.git] / kernel / sched / fair.c
index 0b069bf3e708e82ea0e08c5b204f931ca1d22a00..ef2b104b254cb8c60d954a92e62f0f8a626ed024 100644 (file)
@@ -726,6 +726,11 @@ static void update_curr(struct cfs_rq *cfs_rq)
        account_cfs_rq_runtime(cfs_rq, delta_exec);
 }
 
+static void update_curr_fair(struct rq *rq)
+{
+       update_curr(cfs_rq_of(&rq->curr->se));
+}
+
 static inline void
 update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
@@ -828,11 +833,12 @@ static unsigned int task_nr_scan_windows(struct task_struct *p)
 
 static unsigned int task_scan_min(struct task_struct *p)
 {
+       unsigned int scan_size = ACCESS_ONCE(sysctl_numa_balancing_scan_size);
        unsigned int scan, floor;
        unsigned int windows = 1;
 
-       if (sysctl_numa_balancing_scan_size < MAX_SCAN_WINDOW)
-               windows = MAX_SCAN_WINDOW / sysctl_numa_balancing_scan_size;
+       if (scan_size < MAX_SCAN_WINDOW)
+               windows = MAX_SCAN_WINDOW / scan_size;
        floor = 1000 / windows;
 
        scan = sysctl_numa_balancing_scan_period_min / task_nr_scan_windows(p);
@@ -1164,9 +1170,26 @@ static void task_numa_compare(struct task_numa_env *env,
        long moveimp = imp;
 
        rcu_read_lock();
-       cur = ACCESS_ONCE(dst_rq->curr);
-       if (cur->pid == 0) /* idle */
+
+       raw_spin_lock_irq(&dst_rq->lock);
+       cur = dst_rq->curr;
+       /*
+        * No need to move the exiting task, and this ensures that ->curr
+        * wasn't reaped and thus get_task_struct() in task_numa_assign()
+        * is safe under RCU read lock.
+        * Note that rcu_read_lock() itself can't protect from the final
+        * put_task_struct() after the last schedule().
+        */
+       if ((cur->flags & PF_EXITING) || is_idle_task(cur))
                cur = NULL;
+       raw_spin_unlock_irq(&dst_rq->lock);
+
+       /*
+        * Because we have preemption enabled we can get migrated around and
+        * end try selecting ourselves (current == env->p) as a swap candidate.
+        */
+       if (cur == env->p)
+               goto unlock;
 
        /*
         * "imp" is the fault differential for the source task between the
@@ -1520,7 +1543,7 @@ static void update_task_scan_period(struct task_struct *p,
                 * scanning faster if shared accesses dominate as it may
                 * simply bounce migrations uselessly
                 */
-               ratio = DIV_ROUND_UP(private * NUMA_PERIOD_SLOTS, (private + shared));
+               ratio = DIV_ROUND_UP(private * NUMA_PERIOD_SLOTS, (private + shared + 1));
                diff = (diff * ratio) / NUMA_PERIOD_SLOTS;
        }
 
@@ -7938,6 +7961,8 @@ const struct sched_class fair_sched_class = {
 
        .get_rr_interval        = get_rr_interval_fair,
 
+       .update_curr            = update_curr_fair,
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
        .task_move_group        = task_move_group_fair,
 #endif