xfs: split metadata and log buffer completion to separate workqueues

[firefly-linux-kernel-4.4.55.git] / kernel / sched / cputime.c
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c

index 72fdf06ef8652d5cb443b080f53ac117bd5517ba..8394b1ee600c38ba6e9144a6326369b6ef0cdacd 100644 (file)
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -288,24 +288,29 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
         struct signal_struct *sig = tsk->signal;
         cputime_t utime, stime;
         struct task_struct *t;
-
-       times->utime = sig->utime;
-       times->stime = sig->stime;
-       times->sum_exec_runtime = sig->sum_sched_runtime;
+       unsigned int seq, nextseq;
+       unsigned long flags;
  
         rcu_read_lock();
-       /* make sure we can trust tsk->thread_group list */
-       if (!likely(pid_alive(tsk)))
-               goto out;
-
-       t = tsk;
+       /* Attempt a lockless read on the first round. */
+       nextseq = 0;
         do {
-               task_cputime(t, &utime, &stime);
-               times->utime += utime;
-               times->stime += stime;
-               times->sum_exec_runtime += task_sched_runtime(t);
-       } while_each_thread(tsk, t);
-out:
+               seq = nextseq;
+               flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
+               times->utime = sig->utime;
+               times->stime = sig->stime;
+               times->sum_exec_runtime = sig->sum_sched_runtime;
+
+               for_each_thread(tsk, t) {
+                       task_cputime(t, &utime, &stime);
+                       times->utime += utime;
+                       times->stime += stime;
+                       times->sum_exec_runtime += task_sched_runtime(t);
+               }
+               /* If lockless access failed, take the lock. */
+               nextseq = 1;
+       } while (need_seqretry(&sig->stats_lock, seq));
+       done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
         rcu_read_unlock();
  }
  
@@ -549,6 +554,23 @@ drop_precision:
         return (__force cputime_t) scaled;
  }
  
+/*
+ * Atomically advance counter to the new value. Interrupts, vcpu
+ * scheduling, and scaling inaccuracies can cause cputime_advance
+ * to be occasionally called with a new value smaller than counter.
+ * Let's enforce atomicity.
+ *
+ * Normally a caller will only go through this loop once, or not
+ * at all in case a previous caller updated counter the same jiffy.
+ */
+static void cputime_advance(cputime_t *counter, cputime_t new)
+{
+       cputime_t old;
+
+       while (new > (old = ACCESS_ONCE(*counter)))
+               cmpxchg_cputime(counter, old, new);
+}
+
  /*
   * Adjust tick based cputime random precision against scheduler
   * runtime accounting.
@@ -594,13 +616,8 @@ static void cputime_adjust(struct task_cputime *curr,
                 utime = rtime - stime;
         }
  
-       /*
-        * If the tick based count grows faster than the scheduler one,
-        * the result of the scaling may go backward.
-        * Let's enforce monotonicity.
-        */
-       prev->stime = max(prev->stime, stime);
-       prev->utime = max(prev->utime, utime);
+       cputime_advance(&prev->stime, stime);
+       cputime_advance(&prev->utime, utime);
  
  out:
         *ut = prev->utime;
@@ -617,9 +634,6 @@ void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
         cputime_adjust(&cputime, &p->prev_cputime, ut, st);
  }
  
-/*
- * Must be called with siglock held.
- */
  void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
  {
         struct task_cputime cputime;