perf_counter: Allow software counters to count while task is not running
authorPaul Mackerras <paulus@samba.org>
Mon, 1 Jun 2009 07:49:14 +0000 (17:49 +1000)
committerIngo Molnar <mingo@elte.hu>
Mon, 1 Jun 2009 08:04:06 +0000 (10:04 +0200)
This changes perf_swcounter_match() so that per-task software
counters can count events that occur while their associated
task is not running.  This will allow us to use the generic
software counter code for counting task migrations, which can
occur while the task is not scheduled in.

To do this, we have to distinguish between the situations where
the counter is inactive because its task has been scheduled
out, and those where the counter is inactive because it is part
of a group that was not able to go on the PMU.  In the former
case we want the counter to count, but not in the latter case.
If the context is active, we have the latter case.  If the
context is inactive then we need to know whether the counter
was counting when the context was last active, which we can
determine by comparing its ->tstamp_stopped timestamp with the
context's timestamp.

This also folds three checks in perf_swcounter_match, checking
perf_event_raw(), perf_event_type() and perf_event_id()
individually, into a single 64-bit comparison on
counter->hw_event.config, as an optimization.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <18979.34810.259718.955621@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
kernel/perf_counter.c

index da8dfef4b472cc80c9a7da69f5b89508a13d2747..ff8b4636f8451896e604999b53f1acc82aca0879 100644 (file)
@@ -2867,20 +2867,56 @@ static void perf_swcounter_overflow(struct perf_counter *counter,
 
 }
 
+static int perf_swcounter_is_counting(struct perf_counter *counter)
+{
+       struct perf_counter_context *ctx;
+       unsigned long flags;
+       int count;
+
+       if (counter->state == PERF_COUNTER_STATE_ACTIVE)
+               return 1;
+
+       if (counter->state != PERF_COUNTER_STATE_INACTIVE)
+               return 0;
+
+       /*
+        * If the counter is inactive, it could be just because
+        * its task is scheduled out, or because it's in a group
+        * which could not go on the PMU.  We want to count in
+        * the first case but not the second.  If the context is
+        * currently active then an inactive software counter must
+        * be the second case.  If it's not currently active then
+        * we need to know whether the counter was active when the
+        * context was last active, which we can determine by
+        * comparing counter->tstamp_stopped with ctx->time.
+        *
+        * We are within an RCU read-side critical section,
+        * which protects the existence of *ctx.
+        */
+       ctx = counter->ctx;
+       spin_lock_irqsave(&ctx->lock, flags);
+       count = 1;
+       /* Re-check state now we have the lock */
+       if (counter->state < PERF_COUNTER_STATE_INACTIVE ||
+           counter->ctx->is_active ||
+           counter->tstamp_stopped < ctx->time)
+               count = 0;
+       spin_unlock_irqrestore(&ctx->lock, flags);
+       return count;
+}
+
 static int perf_swcounter_match(struct perf_counter *counter,
                                enum perf_event_types type,
                                u32 event, struct pt_regs *regs)
 {
-       if (counter->state != PERF_COUNTER_STATE_ACTIVE)
-               return 0;
+       u64 event_config;
 
-       if (perf_event_raw(&counter->hw_event))
-               return 0;
+       event_config = ((u64) type << PERF_COUNTER_TYPE_SHIFT) | event;
 
-       if (perf_event_type(&counter->hw_event) != type)
+       if (!perf_swcounter_is_counting(counter))
                return 0;
 
-       if (perf_event_id(&counter->hw_event) != event)
+       if (counter->hw_event.config != event_config)
                return 0;
 
        if (counter->hw_event.exclude_user && user_mode(regs))