intel_pstate: add sample time scaling
authorDirk Brandewie <dirk.j.brandewie@intel.com>
Thu, 29 May 2014 16:32:24 +0000 (09:32 -0700)
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>
Mon, 2 Jun 2014 10:45:05 +0000 (12:45 +0200)
The PID assumes that samples are of equal time, which for a deferable
timers this is not true when the system goes idle.  This causes the
PID to take a long time to converge to the min P state and depending
on the pattern of the idle load can make the P state appear stuck.

The hold-off value of three sample times before using the scaling is
to give a grace period for applications that have high performance
requirements and spend a lot of time idle,  The poster child for this
behavior is the ffmpeg benchmark in the Phoronix test suite.

Cc: 3.14+ <stable@vger.kernel.org> # 3.14+
Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
drivers/cpufreq/intel_pstate.c

index 3d57e53212d623f48dcd20a231e1ac00f86ad8f6..2a07588dcbac9578b0b6056089c66d49395111f1 100644 (file)
@@ -60,6 +60,7 @@ struct sample {
        u64 aperf;
        u64 mperf;
        int freq;
+       ktime_t time;
 };
 
 struct pstate_data {
@@ -97,6 +98,7 @@ struct cpudata {
        struct vid_data vid;
        struct _pid pid;
 
+       ktime_t last_sample_time;
        u64     prev_aperf;
        u64     prev_mperf;
        struct sample sample;
@@ -583,6 +585,8 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
        aperf = aperf >> FRAC_BITS;
        mperf = mperf >> FRAC_BITS;
 
+       cpu->last_sample_time = cpu->sample.time;
+       cpu->sample.time = ktime_get();
        cpu->sample.aperf = aperf;
        cpu->sample.mperf = mperf;
        cpu->sample.aperf -= cpu->prev_aperf;
@@ -605,12 +609,24 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
 
 static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
 {
-       int32_t core_busy, max_pstate, current_pstate;
+       int32_t core_busy, max_pstate, current_pstate, sample_ratio;
+       u32 duration_us;
+       u32 sample_time;
 
        core_busy = cpu->sample.core_pct_busy;
        max_pstate = int_tofp(cpu->pstate.max_pstate);
        current_pstate = int_tofp(cpu->pstate.current_pstate);
        core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
+
+       sample_time = (pid_params.sample_rate_ms  * USEC_PER_MSEC);
+       duration_us = (u32) ktime_us_delta(cpu->sample.time,
+                                       cpu->last_sample_time);
+       if (duration_us > sample_time * 3) {
+               sample_ratio = div_fp(int_tofp(sample_time),
+                               int_tofp(duration_us));
+               core_busy = mul_fp(core_busy, sample_ratio);
+       }
+
        return core_busy;
 }