cpufreq: interactive: New 'interactive' governor
[firefly-linux-kernel-4.4.55.git] / drivers / cpufreq / cpufreq_interactive.c
1 /*
2  * drivers/cpufreq/cpufreq_interactive.c
3  *
4  * Copyright (C) 2010 Google, Inc.
5  *
6  * This software is licensed under the terms of the GNU General Public
7  * License version 2, as published by the Free Software Foundation, and
8  * may be copied, distributed, and modified under those terms.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * Author: Mike Chan (mike@android.com)
16  *
17  */
18
19 #include <linux/cpu.h>
20 #include <linux/cpumask.h>
21 #include <linux/cpufreq.h>
22 #include <linux/module.h>
23 #include <linux/mutex.h>
24 #include <linux/sched.h>
25 #include <linux/sched/rt.h>
26 #include <linux/tick.h>
27 #include <linux/time.h>
28 #include <linux/timer.h>
29 #include <linux/workqueue.h>
30 #include <linux/kthread.h>
31 #include <linux/mutex.h>
32
33 #include <asm/cputime.h>
34
35 static atomic_t active_count = ATOMIC_INIT(0);
36
37 struct cpufreq_interactive_cpuinfo {
38         struct timer_list cpu_timer;
39         int timer_idlecancel;
40         u64 time_in_idle;
41         u64 idle_exit_time;
42         u64 timer_run_time;
43         int idling;
44         u64 freq_change_time;
45         u64 freq_change_time_in_idle;
46         struct cpufreq_policy *policy;
47         struct cpufreq_frequency_table *freq_table;
48         unsigned int target_freq;
49         int governor_enabled;
50 };
51
52 static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo);
53
54 /* Workqueues handle frequency scaling */
55 static struct task_struct *up_task;
56 static struct workqueue_struct *down_wq;
57 static struct work_struct freq_scale_down_work;
58 static cpumask_t up_cpumask;
59 static spinlock_t up_cpumask_lock;
60 static cpumask_t down_cpumask;
61 static spinlock_t down_cpumask_lock;
62 static struct mutex set_speed_lock;
63
64 /* Hi speed to bump to from lo speed when load burst (default max) */
65 static u64 hispeed_freq;
66
67 /* Go to hi speed when CPU load at or above this value. */
68 #define DEFAULT_GO_HISPEED_LOAD 95
69 static unsigned long go_hispeed_load;
70
71 /*
72  * The minimum amount of time to spend at a frequency before we can ramp down.
73  */
74 #define DEFAULT_MIN_SAMPLE_TIME 20 * USEC_PER_MSEC
75 static unsigned long min_sample_time;
76
77 /*
78  * The sample rate of the timer used to increase frequency
79  */
80 #define DEFAULT_TIMER_RATE 20 * USEC_PER_MSEC
81 static unsigned long timer_rate;
82
83 static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
84                 unsigned int event);
85
86 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
87 static
88 #endif
89 struct cpufreq_governor cpufreq_gov_interactive = {
90         .name = "interactive",
91         .governor = cpufreq_governor_interactive,
92         .max_transition_latency = 10000000,
93         .owner = THIS_MODULE,
94 };
95
96 static void cpufreq_interactive_timer(unsigned long data)
97 {
98         unsigned int delta_idle;
99         unsigned int delta_time;
100         int cpu_load;
101         int load_since_change;
102         u64 time_in_idle;
103         u64 idle_exit_time;
104         struct cpufreq_interactive_cpuinfo *pcpu =
105                 &per_cpu(cpuinfo, data);
106         u64 now_idle;
107         unsigned int new_freq;
108         unsigned int index;
109         unsigned long flags;
110
111         smp_rmb();
112
113         if (!pcpu->governor_enabled)
114                 goto exit;
115
116         /*
117          * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time,
118          * this lets idle exit know the current idle time sample has
119          * been processed, and idle exit can generate a new sample and
120          * re-arm the timer.  This prevents a concurrent idle
121          * exit on that CPU from writing a new set of info at the same time
122          * the timer function runs (the timer function can't use that info
123          * until more time passes).
124          */
125         time_in_idle = pcpu->time_in_idle;
126         idle_exit_time = pcpu->idle_exit_time;
127         now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time);
128         smp_wmb();
129
130         /* If we raced with cancelling a timer, skip. */
131         if (!idle_exit_time)
132                 goto exit;
133
134         delta_idle = (unsigned int)(now_idle - time_in_idle);
135         delta_time = (unsigned int)(pcpu->timer_run_time - idle_exit_time);
136
137         /*
138          * If timer ran less than 1ms after short-term sample started, retry.
139          */
140         if (delta_time < 1000)
141                 goto rearm;
142
143         if (delta_idle > delta_time)
144                 cpu_load = 0;
145         else
146                 cpu_load = 100 * (delta_time - delta_idle) / delta_time;
147
148         delta_idle = (unsigned int)(now_idle - pcpu->freq_change_time_in_idle);
149         delta_time = (unsigned int)(pcpu->timer_run_time - pcpu->freq_change_time);
150
151         if ((delta_time == 0) || (delta_idle > delta_time))
152                 load_since_change = 0;
153         else
154                 load_since_change =
155                         100 * (delta_time - delta_idle) / delta_time;
156
157         /*
158          * Choose greater of short-term load (since last idle timer
159          * started or timer function re-armed itself) or long-term load
160          * (since last frequency change).
161          */
162         if (load_since_change > cpu_load)
163                 cpu_load = load_since_change;
164
165         if (cpu_load >= go_hispeed_load) {
166                 if (pcpu->policy->cur == pcpu->policy->min)
167                         new_freq = hispeed_freq;
168                 else
169                         new_freq = pcpu->policy->max * cpu_load / 100;
170         } else {
171                 new_freq = pcpu->policy->cur * cpu_load / 100;
172         }
173
174         if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
175                                            new_freq, CPUFREQ_RELATION_H,
176                                            &index)) {
177                 pr_warn_once("timer %d: cpufreq_frequency_table_target error\n",
178                              (int) data);
179                 goto rearm;
180         }
181
182         new_freq = pcpu->freq_table[index].frequency;
183
184         if (pcpu->target_freq == new_freq)
185                 goto rearm_if_notmax;
186
187         /*
188          * Do not scale down unless we have been at this frequency for the
189          * minimum sample time.
190          */
191         if (new_freq < pcpu->target_freq) {
192                 if (pcpu->timer_run_time - pcpu->freq_change_time
193                     < min_sample_time)
194                         goto rearm;
195         }
196
197         if (new_freq < pcpu->target_freq) {
198                 pcpu->target_freq = new_freq;
199                 spin_lock_irqsave(&down_cpumask_lock, flags);
200                 cpumask_set_cpu(data, &down_cpumask);
201                 spin_unlock_irqrestore(&down_cpumask_lock, flags);
202                 queue_work(down_wq, &freq_scale_down_work);
203         } else {
204                 pcpu->target_freq = new_freq;
205                 spin_lock_irqsave(&up_cpumask_lock, flags);
206                 cpumask_set_cpu(data, &up_cpumask);
207                 spin_unlock_irqrestore(&up_cpumask_lock, flags);
208                 wake_up_process(up_task);
209         }
210
211 rearm_if_notmax:
212         /*
213          * Already set max speed and don't see a need to change that,
214          * wait until next idle to re-evaluate, don't need timer.
215          */
216         if (pcpu->target_freq == pcpu->policy->max)
217                 goto exit;
218
219 rearm:
220         if (!timer_pending(&pcpu->cpu_timer)) {
221                 /*
222                  * If already at min: if that CPU is idle, don't set timer.
223                  * Else cancel the timer if that CPU goes idle.  We don't
224                  * need to re-evaluate speed until the next idle exit.
225                  */
226                 if (pcpu->target_freq == pcpu->policy->min) {
227                         smp_rmb();
228
229                         if (pcpu->idling)
230                                 goto exit;
231
232                         pcpu->timer_idlecancel = 1;
233                 }
234
235                 pcpu->time_in_idle = get_cpu_idle_time_us(
236                         data, &pcpu->idle_exit_time);
237                 mod_timer(&pcpu->cpu_timer,
238                           jiffies + usecs_to_jiffies(timer_rate));
239         }
240
241 exit:
242         return;
243 }
244
245 static void cpufreq_interactive_idle_start(void)
246 {
247         struct cpufreq_interactive_cpuinfo *pcpu =
248                 &per_cpu(cpuinfo, smp_processor_id());
249         int pending;
250
251         if (!pcpu->governor_enabled)
252                 return;
253
254         pcpu->idling = 1;
255         smp_wmb();
256         pending = timer_pending(&pcpu->cpu_timer);
257
258         if (pcpu->target_freq != pcpu->policy->min) {
259 #ifdef CONFIG_SMP
260                 /*
261                  * Entering idle while not at lowest speed.  On some
262                  * platforms this can hold the other CPU(s) at that speed
263                  * even though the CPU is idle. Set a timer to re-evaluate
264                  * speed so this idle CPU doesn't hold the other CPUs above
265                  * min indefinitely.  This should probably be a quirk of
266                  * the CPUFreq driver.
267                  */
268                 if (!pending) {
269                         pcpu->time_in_idle = get_cpu_idle_time_us(
270                                 smp_processor_id(), &pcpu->idle_exit_time);
271                         pcpu->timer_idlecancel = 0;
272                         mod_timer(&pcpu->cpu_timer,
273                                   jiffies + usecs_to_jiffies(timer_rate));
274                 }
275 #endif
276         } else {
277                 /*
278                  * If at min speed and entering idle after load has
279                  * already been evaluated, and a timer has been set just in
280                  * case the CPU suddenly goes busy, cancel that timer.  The
281                  * CPU didn't go busy; we'll recheck things upon idle exit.
282                  */
283                 if (pending && pcpu->timer_idlecancel) {
284                         del_timer(&pcpu->cpu_timer);
285                         /*
286                          * Ensure last timer run time is after current idle
287                          * sample start time, so next idle exit will always
288                          * start a new idle sampling period.
289                          */
290                         pcpu->idle_exit_time = 0;
291                         pcpu->timer_idlecancel = 0;
292                 }
293         }
294
295 }
296
297 static void cpufreq_interactive_idle_end(void)
298 {
299         struct cpufreq_interactive_cpuinfo *pcpu =
300                 &per_cpu(cpuinfo, smp_processor_id());
301
302         pcpu->idling = 0;
303         smp_wmb();
304
305         /*
306          * Arm the timer for 1-2 ticks later if not already, and if the timer
307          * function has already processed the previous load sampling
308          * interval.  (If the timer is not pending but has not processed
309          * the previous interval, it is probably racing with us on another
310          * CPU.  Let it compute load based on the previous sample and then
311          * re-arm the timer for another interval when it's done, rather
312          * than updating the interval start time to be "now", which doesn't
313          * give the timer function enough time to make a decision on this
314          * run.)
315          */
316         if (timer_pending(&pcpu->cpu_timer) == 0 &&
317             pcpu->timer_run_time >= pcpu->idle_exit_time &&
318             pcpu->governor_enabled) {
319                 pcpu->time_in_idle =
320                         get_cpu_idle_time_us(smp_processor_id(),
321                                              &pcpu->idle_exit_time);
322                 pcpu->timer_idlecancel = 0;
323                 mod_timer(&pcpu->cpu_timer,
324                           jiffies + usecs_to_jiffies(timer_rate));
325         }
326
327 }
328
329 static int cpufreq_interactive_up_task(void *data)
330 {
331         unsigned int cpu;
332         cpumask_t tmp_mask;
333         unsigned long flags;
334         struct cpufreq_interactive_cpuinfo *pcpu;
335
336         while (1) {
337                 set_current_state(TASK_INTERRUPTIBLE);
338                 spin_lock_irqsave(&up_cpumask_lock, flags);
339
340                 if (cpumask_empty(&up_cpumask)) {
341                         spin_unlock_irqrestore(&up_cpumask_lock, flags);
342                         schedule();
343
344                         if (kthread_should_stop())
345                                 break;
346
347                         spin_lock_irqsave(&up_cpumask_lock, flags);
348                 }
349
350                 set_current_state(TASK_RUNNING);
351                 tmp_mask = up_cpumask;
352                 cpumask_clear(&up_cpumask);
353                 spin_unlock_irqrestore(&up_cpumask_lock, flags);
354
355                 for_each_cpu(cpu, &tmp_mask) {
356                         unsigned int j;
357                         unsigned int max_freq = 0;
358
359                         pcpu = &per_cpu(cpuinfo, cpu);
360                         smp_rmb();
361
362                         if (!pcpu->governor_enabled)
363                                 continue;
364
365                         mutex_lock(&set_speed_lock);
366
367                         for_each_cpu(j, pcpu->policy->cpus) {
368                                 struct cpufreq_interactive_cpuinfo *pjcpu =
369                                         &per_cpu(cpuinfo, j);
370
371                                 if (pjcpu->target_freq > max_freq)
372                                         max_freq = pjcpu->target_freq;
373                         }
374
375                         if (max_freq != pcpu->policy->cur)
376                                 __cpufreq_driver_target(pcpu->policy,
377                                                         max_freq,
378                                                         CPUFREQ_RELATION_H);
379                         mutex_unlock(&set_speed_lock);
380
381                         pcpu->freq_change_time_in_idle =
382                                 get_cpu_idle_time_us(cpu,
383                                                      &pcpu->freq_change_time);
384                 }
385         }
386
387         return 0;
388 }
389
390 static void cpufreq_interactive_freq_down(struct work_struct *work)
391 {
392         unsigned int cpu;
393         cpumask_t tmp_mask;
394         unsigned long flags;
395         struct cpufreq_interactive_cpuinfo *pcpu;
396
397         spin_lock_irqsave(&down_cpumask_lock, flags);
398         tmp_mask = down_cpumask;
399         cpumask_clear(&down_cpumask);
400         spin_unlock_irqrestore(&down_cpumask_lock, flags);
401
402         for_each_cpu(cpu, &tmp_mask) {
403                 unsigned int j;
404                 unsigned int max_freq = 0;
405
406                 pcpu = &per_cpu(cpuinfo, cpu);
407                 smp_rmb();
408
409                 if (!pcpu->governor_enabled)
410                         continue;
411
412                 mutex_lock(&set_speed_lock);
413
414                 for_each_cpu(j, pcpu->policy->cpus) {
415                         struct cpufreq_interactive_cpuinfo *pjcpu =
416                                 &per_cpu(cpuinfo, j);
417
418                         if (pjcpu->target_freq > max_freq)
419                                 max_freq = pjcpu->target_freq;
420                 }
421
422                 if (max_freq != pcpu->policy->cur)
423                         __cpufreq_driver_target(pcpu->policy, max_freq,
424                                                 CPUFREQ_RELATION_H);
425
426                 mutex_unlock(&set_speed_lock);
427                 pcpu->freq_change_time_in_idle =
428                         get_cpu_idle_time_us(cpu,
429                                              &pcpu->freq_change_time);
430         }
431 }
432
433 static ssize_t show_hispeed_freq(struct kobject *kobj,
434                                  struct attribute *attr, char *buf)
435 {
436         return sprintf(buf, "%llu\n", hispeed_freq);
437 }
438
439 static ssize_t store_hispeed_freq(struct kobject *kobj,
440                                   struct attribute *attr, const char *buf,
441                                   size_t count)
442 {
443         int ret;
444         u64 val;
445
446         ret = strict_strtoull(buf, 0, &val);
447         if (ret < 0)
448                 return ret;
449         hispeed_freq = val;
450         return count;
451 }
452
453 static struct global_attr hispeed_freq_attr = __ATTR(hispeed_freq, 0644,
454                 show_hispeed_freq, store_hispeed_freq);
455
456
457 static ssize_t show_go_hispeed_load(struct kobject *kobj,
458                                      struct attribute *attr, char *buf)
459 {
460         return sprintf(buf, "%lu\n", go_hispeed_load);
461 }
462
463 static ssize_t store_go_hispeed_load(struct kobject *kobj,
464                         struct attribute *attr, const char *buf, size_t count)
465 {
466         int ret;
467         unsigned long val;
468
469         ret = strict_strtoul(buf, 0, &val);
470         if (ret < 0)
471                 return ret;
472         go_hispeed_load = val;
473         return count;
474 }
475
476 static struct global_attr go_hispeed_load_attr = __ATTR(go_hispeed_load, 0644,
477                 show_go_hispeed_load, store_go_hispeed_load);
478
479 static ssize_t show_min_sample_time(struct kobject *kobj,
480                                 struct attribute *attr, char *buf)
481 {
482         return sprintf(buf, "%lu\n", min_sample_time);
483 }
484
485 static ssize_t store_min_sample_time(struct kobject *kobj,
486                         struct attribute *attr, const char *buf, size_t count)
487 {
488         int ret;
489         unsigned long val;
490
491         ret = strict_strtoul(buf, 0, &val);
492         if (ret < 0)
493                 return ret;
494         min_sample_time = val;
495         return count;
496 }
497
498 static struct global_attr min_sample_time_attr = __ATTR(min_sample_time, 0644,
499                 show_min_sample_time, store_min_sample_time);
500
501 static ssize_t show_timer_rate(struct kobject *kobj,
502                         struct attribute *attr, char *buf)
503 {
504         return sprintf(buf, "%lu\n", timer_rate);
505 }
506
507 static ssize_t store_timer_rate(struct kobject *kobj,
508                         struct attribute *attr, const char *buf, size_t count)
509 {
510         int ret;
511         unsigned long val;
512
513         ret = strict_strtoul(buf, 0, &val);
514         if (ret < 0)
515                 return ret;
516         timer_rate = val;
517         return count;
518 }
519
520 static struct global_attr timer_rate_attr = __ATTR(timer_rate, 0644,
521                 show_timer_rate, store_timer_rate);
522
523 static struct attribute *interactive_attributes[] = {
524         &hispeed_freq_attr.attr,
525         &go_hispeed_load_attr.attr,
526         &min_sample_time_attr.attr,
527         &timer_rate_attr.attr,
528         NULL,
529 };
530
531 static struct attribute_group interactive_attr_group = {
532         .attrs = interactive_attributes,
533         .name = "interactive",
534 };
535
536 static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
537                 unsigned int event)
538 {
539         int rc;
540         unsigned int j;
541         struct cpufreq_interactive_cpuinfo *pcpu;
542         struct cpufreq_frequency_table *freq_table;
543
544         switch (event) {
545         case CPUFREQ_GOV_START:
546                 if (!cpu_online(policy->cpu))
547                         return -EINVAL;
548
549                 freq_table =
550                         cpufreq_frequency_get_table(policy->cpu);
551
552                 for_each_cpu(j, policy->cpus) {
553                         pcpu = &per_cpu(cpuinfo, j);
554                         pcpu->policy = policy;
555                         pcpu->target_freq = policy->cur;
556                         pcpu->freq_table = freq_table;
557                         pcpu->freq_change_time_in_idle =
558                                 get_cpu_idle_time_us(j,
559                                              &pcpu->freq_change_time);
560                         pcpu->governor_enabled = 1;
561                         smp_wmb();
562                 }
563
564                 if (!hispeed_freq)
565                         hispeed_freq = policy->max;
566
567                 /*
568                  * Do not register the idle hook and create sysfs
569                  * entries if we have already done so.
570                  */
571                 if (atomic_inc_return(&active_count) > 1)
572                         return 0;
573
574                 rc = sysfs_create_group(cpufreq_global_kobject,
575                                 &interactive_attr_group);
576                 if (rc)
577                         return rc;
578
579                 break;
580
581         case CPUFREQ_GOV_STOP:
582                 for_each_cpu(j, policy->cpus) {
583                         pcpu = &per_cpu(cpuinfo, j);
584                         pcpu->governor_enabled = 0;
585                         smp_wmb();
586                         del_timer_sync(&pcpu->cpu_timer);
587
588                         /*
589                          * Reset idle exit time since we may cancel the timer
590                          * before it can run after the last idle exit time,
591                          * to avoid tripping the check in idle exit for a timer
592                          * that is trying to run.
593                          */
594                         pcpu->idle_exit_time = 0;
595                 }
596
597                 flush_work(&freq_scale_down_work);
598                 if (atomic_dec_return(&active_count) > 0)
599                         return 0;
600
601                 sysfs_remove_group(cpufreq_global_kobject,
602                                 &interactive_attr_group);
603
604                 break;
605
606         case CPUFREQ_GOV_LIMITS:
607                 if (policy->max < policy->cur)
608                         __cpufreq_driver_target(policy,
609                                         policy->max, CPUFREQ_RELATION_H);
610                 else if (policy->min > policy->cur)
611                         __cpufreq_driver_target(policy,
612                                         policy->min, CPUFREQ_RELATION_L);
613                 break;
614         }
615         return 0;
616 }
617
618 static int cpufreq_interactive_idle_notifier(struct notifier_block *nb,
619                                              unsigned long val,
620                                              void *data)
621 {
622         switch (val) {
623         case IDLE_START:
624                 cpufreq_interactive_idle_start();
625                 break;
626         case IDLE_END:
627                 cpufreq_interactive_idle_end();
628                 break;
629         }
630
631         return 0;
632 }
633
634 static struct notifier_block cpufreq_interactive_idle_nb = {
635         .notifier_call = cpufreq_interactive_idle_notifier,
636 };
637
638 static int __init cpufreq_interactive_init(void)
639 {
640         unsigned int i;
641         struct cpufreq_interactive_cpuinfo *pcpu;
642         struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
643
644         go_hispeed_load = DEFAULT_GO_HISPEED_LOAD;
645         min_sample_time = DEFAULT_MIN_SAMPLE_TIME;
646         timer_rate = DEFAULT_TIMER_RATE;
647
648         /* Initalize per-cpu timers */
649         for_each_possible_cpu(i) {
650                 pcpu = &per_cpu(cpuinfo, i);
651                 init_timer(&pcpu->cpu_timer);
652                 pcpu->cpu_timer.function = cpufreq_interactive_timer;
653                 pcpu->cpu_timer.data = i;
654         }
655
656         up_task = kthread_create(cpufreq_interactive_up_task, NULL,
657                                  "kinteractiveup");
658         if (IS_ERR(up_task))
659                 return PTR_ERR(up_task);
660
661         sched_setscheduler_nocheck(up_task, SCHED_FIFO, &param);
662         get_task_struct(up_task);
663
664         /* No rescuer thread, bind to CPU queuing the work for possibly
665            warm cache (probably doesn't matter much). */
666         down_wq = alloc_workqueue("knteractive_down", 0, 1);
667
668         if (!down_wq)
669                 goto err_freeuptask;
670
671         INIT_WORK(&freq_scale_down_work,
672                   cpufreq_interactive_freq_down);
673
674         spin_lock_init(&up_cpumask_lock);
675         spin_lock_init(&down_cpumask_lock);
676         mutex_init(&set_speed_lock);
677
678         idle_notifier_register(&cpufreq_interactive_idle_nb);
679
680         return cpufreq_register_governor(&cpufreq_gov_interactive);
681
682 err_freeuptask:
683         put_task_struct(up_task);
684         return -ENOMEM;
685 }
686
687 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
688 fs_initcall(cpufreq_interactive_init);
689 #else
690 module_init(cpufreq_interactive_init);
691 #endif
692
693 static void __exit cpufreq_interactive_exit(void)
694 {
695         cpufreq_unregister_governor(&cpufreq_gov_interactive);
696         kthread_stop(up_task);
697         put_task_struct(up_task);
698         destroy_workqueue(down_wq);
699 }
700
701 module_exit(cpufreq_interactive_exit);
702
703 MODULE_AUTHOR("Mike Chan <mike@android.com>");
704 MODULE_DESCRIPTION("'cpufreq_interactive' - A cpufreq governor for "
705         "Latency sensitive workloads");
706 MODULE_LICENSE("GPL");