Merge tag 'v3.10.2' into linux-linaro-lsk-android
[firefly-linux-kernel-4.4.55.git] / drivers / cpufreq / cpufreq_interactive.c
1 /*
2  * drivers/cpufreq/cpufreq_interactive.c
3  *
4  * Copyright (C) 2010 Google, Inc.
5  *
6  * This software is licensed under the terms of the GNU General Public
7  * License version 2, as published by the Free Software Foundation, and
8  * may be copied, distributed, and modified under those terms.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * Author: Mike Chan (mike@android.com)
16  *
17  */
18
19 #include <linux/cpu.h>
20 #include <linux/cpumask.h>
21 #include <linux/cpufreq.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/rwsem.h>
25 #include <linux/sched.h>
26 #include <linux/sched/rt.h>
27 #include <linux/tick.h>
28 #include <linux/time.h>
29 #include <linux/timer.h>
30 #include <linux/workqueue.h>
31 #include <linux/kthread.h>
32 #include <linux/slab.h>
33 #include "cpufreq_governor.h"
34
35 #define CREATE_TRACE_POINTS
36 #include <trace/events/cpufreq_interactive.h>
37
38 struct cpufreq_interactive_cpuinfo {
39         struct timer_list cpu_timer;
40         struct timer_list cpu_slack_timer;
41         spinlock_t load_lock; /* protects the next 4 fields */
42         u64 time_in_idle;
43         u64 time_in_idle_timestamp;
44         u64 cputime_speedadj;
45         u64 cputime_speedadj_timestamp;
46         struct cpufreq_policy *policy;
47         struct cpufreq_frequency_table *freq_table;
48         unsigned int target_freq;
49         unsigned int floor_freq;
50         u64 floor_validate_time;
51         u64 hispeed_validate_time;
52         struct rw_semaphore enable_sem;
53         int governor_enabled;
54 };
55
56 static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo);
57
58 /* realtime thread handles frequency scaling */
59 static struct task_struct *speedchange_task;
60 static cpumask_t speedchange_cpumask;
61 static spinlock_t speedchange_cpumask_lock;
62 static struct mutex gov_lock;
63
64 #define DEFAULT_TARGET_LOAD 90
65 static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD};
66
67 #define DEFAULT_TIMER_RATE (20 * USEC_PER_MSEC)
68 #define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_TIMER_RATE
69 static unsigned int default_above_hispeed_delay[] = {
70         DEFAULT_ABOVE_HISPEED_DELAY };
71
72 struct cpufreq_interactive_tunables {
73         int usage_count;
74
75         /* Hi speed to bump to from lo speed when load burst (default max) */
76         unsigned int hispeed_freq;
77
78         /* Go to hi speed when CPU load at or above this value. */
79 #define DEFAULT_GO_HISPEED_LOAD 99
80         unsigned long go_hispeed_load;
81
82         /* Target load. Lower values result in higher CPU speeds. */
83         spinlock_t target_loads_lock;
84         unsigned int *target_loads;
85         int ntarget_loads;
86
87         /*
88          * The minimum amount of time to spend at a frequency before we can ramp
89          * down.
90          */
91 #define DEFAULT_MIN_SAMPLE_TIME (80 * USEC_PER_MSEC)
92         unsigned long min_sample_time;
93
94         /*
95          * The sample rate of the timer used to increase frequency
96          */
97         unsigned long timer_rate;
98
99         /*
100          * Wait this long before raising speed above hispeed, by default a
101          * single timer interval.
102          */
103         spinlock_t above_hispeed_delay_lock;
104         unsigned int *above_hispeed_delay;
105         int nabove_hispeed_delay;
106
107         /* Non-zero means indefinite speed boost active */
108         int boost_val;
109         /* Duration of a boost pulse in usecs */
110         int boostpulse_duration_val;
111         /* End time of boost pulse in ktime converted to usecs */
112         u64 boostpulse_endtime;
113
114         /*
115          * Max additional time to wait in idle, beyond timer_rate, at speeds
116          * above minimum before wakeup to reduce speed, or -1 if unnecessary.
117          */
118 #define DEFAULT_TIMER_SLACK (4 * DEFAULT_TIMER_RATE)
119         int timer_slack_val;
120
121         bool io_is_busy;
122 };
123
124 /* For cases where we have single governor instance for system */
125 struct cpufreq_interactive_tunables *common_tunables;
126
127 static struct attribute_group *get_sysfs_attr(void);
128
129 static void cpufreq_interactive_timer_resched(
130         struct cpufreq_interactive_cpuinfo *pcpu)
131 {
132         struct cpufreq_interactive_tunables *tunables =
133                 pcpu->policy->governor_data;
134         unsigned long expires;
135         unsigned long flags;
136
137         spin_lock_irqsave(&pcpu->load_lock, flags);
138         pcpu->time_in_idle =
139                 get_cpu_idle_time(smp_processor_id(),
140                                      &pcpu->time_in_idle_timestamp,
141                                      tunables->io_is_busy);
142         pcpu->cputime_speedadj = 0;
143         pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp;
144         expires = jiffies + usecs_to_jiffies(tunables->timer_rate);
145         mod_timer_pinned(&pcpu->cpu_timer, expires);
146
147         if (tunables->timer_slack_val >= 0 &&
148                         pcpu->target_freq > pcpu->policy->min) {
149                 expires += usecs_to_jiffies(tunables->timer_slack_val);
150                 mod_timer_pinned(&pcpu->cpu_slack_timer, expires);
151         }
152
153         spin_unlock_irqrestore(&pcpu->load_lock, flags);
154 }
155
156 /* The caller shall take enable_sem write semaphore to avoid any timer race.
157  * The cpu_timer and cpu_slack_timer must be deactivated when calling this
158  * function.
159  */
160 static void cpufreq_interactive_timer_start(
161                 struct cpufreq_interactive_tunables *tunables, int cpu)
162 {
163         struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu);
164         unsigned long expires = jiffies + usecs_to_jiffies(tunables->timer_rate);
165         unsigned long flags;
166
167         pcpu->cpu_timer.expires = expires;
168         add_timer_on(&pcpu->cpu_timer, cpu);
169         if (tunables->timer_slack_val >= 0 &&
170                         pcpu->target_freq > pcpu->policy->min) {
171                 expires += usecs_to_jiffies(tunables->timer_slack_val);
172                 pcpu->cpu_slack_timer.expires = expires;
173                 add_timer_on(&pcpu->cpu_slack_timer, cpu);
174         }
175
176         spin_lock_irqsave(&pcpu->load_lock, flags);
177         pcpu->time_in_idle =
178                 get_cpu_idle_time(cpu, &pcpu->time_in_idle_timestamp,
179                                   tunables->io_is_busy);
180         pcpu->cputime_speedadj = 0;
181         pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp;
182         spin_unlock_irqrestore(&pcpu->load_lock, flags);
183 }
184
185 static unsigned int freq_to_above_hispeed_delay(
186                 struct cpufreq_interactive_tunables *tunables,
187                 unsigned int freq)
188 {
189         int i;
190         unsigned int ret;
191         unsigned long flags;
192
193         spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags);
194
195         for (i = 0; i < tunables->nabove_hispeed_delay - 1 &&
196                         freq >= tunables->above_hispeed_delay[i+1]; i += 2)
197                 ;
198
199         ret = tunables->above_hispeed_delay[i];
200         spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags);
201         return ret;
202 }
203
204 static unsigned int freq_to_targetload(struct cpufreq_interactive_tunables
205                 *tunables, unsigned int freq)
206 {
207         int i;
208         unsigned int ret;
209         unsigned long flags;
210
211         spin_lock_irqsave(&tunables->target_loads_lock, flags);
212
213         for (i = 0; i < tunables->ntarget_loads - 1 &&
214                         freq >= tunables->target_loads[i+1]; i += 2)
215                 ;
216
217         ret = tunables->target_loads[i];
218         spin_unlock_irqrestore(&tunables->target_loads_lock, flags);
219         return ret;
220 }
221
222 /*
223  * If increasing frequencies never map to a lower target load then
224  * choose_freq() will find the minimum frequency that does not exceed its
225  * target load given the current load.
226  */
227 static unsigned int choose_freq(struct cpufreq_interactive_cpuinfo *pcpu,
228                 unsigned int loadadjfreq)
229 {
230         unsigned int freq = pcpu->policy->cur;
231         unsigned int prevfreq, freqmin, freqmax;
232         unsigned int tl;
233         int index;
234
235         freqmin = 0;
236         freqmax = UINT_MAX;
237
238         do {
239                 prevfreq = freq;
240                 tl = freq_to_targetload(pcpu->policy->governor_data, freq);
241
242                 /*
243                  * Find the lowest frequency where the computed load is less
244                  * than or equal to the target load.
245                  */
246
247                 if (cpufreq_frequency_table_target(
248                             pcpu->policy, pcpu->freq_table, loadadjfreq / tl,
249                             CPUFREQ_RELATION_L, &index))
250                         break;
251                 freq = pcpu->freq_table[index].frequency;
252
253                 if (freq > prevfreq) {
254                         /* The previous frequency is too low. */
255                         freqmin = prevfreq;
256
257                         if (freq >= freqmax) {
258                                 /*
259                                  * Find the highest frequency that is less
260                                  * than freqmax.
261                                  */
262                                 if (cpufreq_frequency_table_target(
263                                             pcpu->policy, pcpu->freq_table,
264                                             freqmax - 1, CPUFREQ_RELATION_H,
265                                             &index))
266                                         break;
267                                 freq = pcpu->freq_table[index].frequency;
268
269                                 if (freq == freqmin) {
270                                         /*
271                                          * The first frequency below freqmax
272                                          * has already been found to be too
273                                          * low.  freqmax is the lowest speed
274                                          * we found that is fast enough.
275                                          */
276                                         freq = freqmax;
277                                         break;
278                                 }
279                         }
280                 } else if (freq < prevfreq) {
281                         /* The previous frequency is high enough. */
282                         freqmax = prevfreq;
283
284                         if (freq <= freqmin) {
285                                 /*
286                                  * Find the lowest frequency that is higher
287                                  * than freqmin.
288                                  */
289                                 if (cpufreq_frequency_table_target(
290                                             pcpu->policy, pcpu->freq_table,
291                                             freqmin + 1, CPUFREQ_RELATION_L,
292                                             &index))
293                                         break;
294                                 freq = pcpu->freq_table[index].frequency;
295
296                                 /*
297                                  * If freqmax is the first frequency above
298                                  * freqmin then we have already found that
299                                  * this speed is fast enough.
300                                  */
301                                 if (freq == freqmax)
302                                         break;
303                         }
304                 }
305
306                 /* If same frequency chosen as previous then done. */
307         } while (freq != prevfreq);
308
309         return freq;
310 }
311
312 static u64 update_load(int cpu)
313 {
314         struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu);
315         struct cpufreq_interactive_tunables *tunables =
316                 pcpu->policy->governor_data;
317         u64 now;
318         u64 now_idle;
319         u64 delta_idle;
320         u64 delta_time;
321         u64 active_time;
322
323         now_idle = get_cpu_idle_time(cpu, &now, tunables->io_is_busy);
324         delta_idle = (now_idle - pcpu->time_in_idle);
325         delta_time = (now - pcpu->time_in_idle_timestamp);
326
327         if (delta_time <= delta_idle)
328                 active_time = 0;
329         else
330                 active_time = delta_time - delta_idle;
331
332         pcpu->cputime_speedadj += active_time * pcpu->policy->cur;
333
334         pcpu->time_in_idle = now_idle;
335         pcpu->time_in_idle_timestamp = now;
336         return now;
337 }
338
339 static void cpufreq_interactive_timer(unsigned long data)
340 {
341         u64 now;
342         unsigned int delta_time;
343         u64 cputime_speedadj;
344         int cpu_load;
345         struct cpufreq_interactive_cpuinfo *pcpu =
346                 &per_cpu(cpuinfo, data);
347         struct cpufreq_interactive_tunables *tunables =
348                 pcpu->policy->governor_data;
349         unsigned int new_freq;
350         unsigned int loadadjfreq;
351         unsigned int index;
352         unsigned long flags;
353         bool boosted;
354
355         if (!down_read_trylock(&pcpu->enable_sem))
356                 return;
357         if (!pcpu->governor_enabled)
358                 goto exit;
359
360         spin_lock_irqsave(&pcpu->load_lock, flags);
361         now = update_load(data);
362         delta_time = (unsigned int)(now - pcpu->cputime_speedadj_timestamp);
363         cputime_speedadj = pcpu->cputime_speedadj;
364         spin_unlock_irqrestore(&pcpu->load_lock, flags);
365
366         if (WARN_ON_ONCE(!delta_time))
367                 goto rearm;
368
369         do_div(cputime_speedadj, delta_time);
370         loadadjfreq = (unsigned int)cputime_speedadj * 100;
371         cpu_load = loadadjfreq / pcpu->target_freq;
372         boosted = tunables->boost_val || now < tunables->boostpulse_endtime;
373
374         if (cpu_load >= tunables->go_hispeed_load || boosted) {
375                 if (pcpu->target_freq < tunables->hispeed_freq) {
376                         new_freq = tunables->hispeed_freq;
377                 } else {
378                         new_freq = choose_freq(pcpu, loadadjfreq);
379
380                         if (new_freq < tunables->hispeed_freq)
381                                 new_freq = tunables->hispeed_freq;
382                 }
383         } else {
384                 new_freq = choose_freq(pcpu, loadadjfreq);
385         }
386
387         if (pcpu->target_freq >= tunables->hispeed_freq &&
388             new_freq > pcpu->target_freq &&
389             now - pcpu->hispeed_validate_time <
390             freq_to_above_hispeed_delay(tunables, pcpu->target_freq)) {
391                 trace_cpufreq_interactive_notyet(
392                         data, cpu_load, pcpu->target_freq,
393                         pcpu->policy->cur, new_freq);
394                 goto rearm;
395         }
396
397         pcpu->hispeed_validate_time = now;
398
399         if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
400                                            new_freq, CPUFREQ_RELATION_L,
401                                            &index))
402                 goto rearm;
403
404         new_freq = pcpu->freq_table[index].frequency;
405
406         /*
407          * Do not scale below floor_freq unless we have been at or above the
408          * floor frequency for the minimum sample time since last validated.
409          */
410         if (new_freq < pcpu->floor_freq) {
411                 if (now - pcpu->floor_validate_time <
412                                 tunables->min_sample_time) {
413                         trace_cpufreq_interactive_notyet(
414                                 data, cpu_load, pcpu->target_freq,
415                                 pcpu->policy->cur, new_freq);
416                         goto rearm;
417                 }
418         }
419
420         /*
421          * Update the timestamp for checking whether speed has been held at
422          * or above the selected frequency for a minimum of min_sample_time,
423          * if not boosted to hispeed_freq.  If boosted to hispeed_freq then we
424          * allow the speed to drop as soon as the boostpulse duration expires
425          * (or the indefinite boost is turned off).
426          */
427
428         if (!boosted || new_freq > tunables->hispeed_freq) {
429                 pcpu->floor_freq = new_freq;
430                 pcpu->floor_validate_time = now;
431         }
432
433         if (pcpu->target_freq == new_freq) {
434                 trace_cpufreq_interactive_already(
435                         data, cpu_load, pcpu->target_freq,
436                         pcpu->policy->cur, new_freq);
437                 goto rearm_if_notmax;
438         }
439
440         trace_cpufreq_interactive_target(data, cpu_load, pcpu->target_freq,
441                                          pcpu->policy->cur, new_freq);
442
443         pcpu->target_freq = new_freq;
444         spin_lock_irqsave(&speedchange_cpumask_lock, flags);
445         cpumask_set_cpu(data, &speedchange_cpumask);
446         spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
447         wake_up_process(speedchange_task);
448
449 rearm_if_notmax:
450         /*
451          * Already set max speed and don't see a need to change that,
452          * wait until next idle to re-evaluate, don't need timer.
453          */
454         if (pcpu->target_freq == pcpu->policy->max)
455                 goto exit;
456
457 rearm:
458         if (!timer_pending(&pcpu->cpu_timer))
459                 cpufreq_interactive_timer_resched(pcpu);
460
461 exit:
462         up_read(&pcpu->enable_sem);
463         return;
464 }
465
466 static void cpufreq_interactive_idle_start(void)
467 {
468         struct cpufreq_interactive_cpuinfo *pcpu =
469                 &per_cpu(cpuinfo, smp_processor_id());
470         int pending;
471
472         if (!down_read_trylock(&pcpu->enable_sem))
473                 return;
474         if (!pcpu->governor_enabled) {
475                 up_read(&pcpu->enable_sem);
476                 return;
477         }
478
479         pending = timer_pending(&pcpu->cpu_timer);
480
481         if (pcpu->target_freq != pcpu->policy->min) {
482                 /*
483                  * Entering idle while not at lowest speed.  On some
484                  * platforms this can hold the other CPU(s) at that speed
485                  * even though the CPU is idle. Set a timer to re-evaluate
486                  * speed so this idle CPU doesn't hold the other CPUs above
487                  * min indefinitely.  This should probably be a quirk of
488                  * the CPUFreq driver.
489                  */
490                 if (!pending)
491                         cpufreq_interactive_timer_resched(pcpu);
492         }
493
494         up_read(&pcpu->enable_sem);
495 }
496
497 static void cpufreq_interactive_idle_end(void)
498 {
499         struct cpufreq_interactive_cpuinfo *pcpu =
500                 &per_cpu(cpuinfo, smp_processor_id());
501
502         if (!down_read_trylock(&pcpu->enable_sem))
503                 return;
504         if (!pcpu->governor_enabled) {
505                 up_read(&pcpu->enable_sem);
506                 return;
507         }
508
509         /* Arm the timer for 1-2 ticks later if not already. */
510         if (!timer_pending(&pcpu->cpu_timer)) {
511                 cpufreq_interactive_timer_resched(pcpu);
512         } else if (time_after_eq(jiffies, pcpu->cpu_timer.expires)) {
513                 del_timer(&pcpu->cpu_timer);
514                 del_timer(&pcpu->cpu_slack_timer);
515                 cpufreq_interactive_timer(smp_processor_id());
516         }
517
518         up_read(&pcpu->enable_sem);
519 }
520
521 static int cpufreq_interactive_speedchange_task(void *data)
522 {
523         unsigned int cpu;
524         cpumask_t tmp_mask;
525         unsigned long flags;
526         struct cpufreq_interactive_cpuinfo *pcpu;
527
528         while (1) {
529                 set_current_state(TASK_INTERRUPTIBLE);
530                 spin_lock_irqsave(&speedchange_cpumask_lock, flags);
531
532                 if (cpumask_empty(&speedchange_cpumask)) {
533                         spin_unlock_irqrestore(&speedchange_cpumask_lock,
534                                                flags);
535                         schedule();
536
537                         if (kthread_should_stop())
538                                 break;
539
540                         spin_lock_irqsave(&speedchange_cpumask_lock, flags);
541                 }
542
543                 set_current_state(TASK_RUNNING);
544                 tmp_mask = speedchange_cpumask;
545                 cpumask_clear(&speedchange_cpumask);
546                 spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
547
548                 for_each_cpu(cpu, &tmp_mask) {
549                         unsigned int j;
550                         unsigned int max_freq = 0;
551
552                         pcpu = &per_cpu(cpuinfo, cpu);
553                         if (!down_read_trylock(&pcpu->enable_sem))
554                                 continue;
555                         if (!pcpu->governor_enabled) {
556                                 up_read(&pcpu->enable_sem);
557                                 continue;
558                         }
559
560                         for_each_cpu(j, pcpu->policy->cpus) {
561                                 struct cpufreq_interactive_cpuinfo *pjcpu =
562                                         &per_cpu(cpuinfo, j);
563
564                                 if (pjcpu->target_freq > max_freq)
565                                         max_freq = pjcpu->target_freq;
566                         }
567
568                         if (max_freq != pcpu->policy->cur)
569                                 __cpufreq_driver_target(pcpu->policy,
570                                                         max_freq,
571                                                         CPUFREQ_RELATION_H);
572                         trace_cpufreq_interactive_setspeed(cpu,
573                                                      pcpu->target_freq,
574                                                      pcpu->policy->cur);
575
576                         up_read(&pcpu->enable_sem);
577                 }
578         }
579
580         return 0;
581 }
582
583 static void cpufreq_interactive_boost(void)
584 {
585         int i;
586         int anyboost = 0;
587         unsigned long flags;
588         struct cpufreq_interactive_cpuinfo *pcpu;
589         struct cpufreq_interactive_tunables *tunables;
590
591         spin_lock_irqsave(&speedchange_cpumask_lock, flags);
592
593         for_each_online_cpu(i) {
594                 pcpu = &per_cpu(cpuinfo, i);
595                 tunables = pcpu->policy->governor_data;
596
597                 if (pcpu->target_freq < tunables->hispeed_freq) {
598                         pcpu->target_freq = tunables->hispeed_freq;
599                         cpumask_set_cpu(i, &speedchange_cpumask);
600                         pcpu->hispeed_validate_time =
601                                 ktime_to_us(ktime_get());
602                         anyboost = 1;
603                 }
604
605                 /*
606                  * Set floor freq and (re)start timer for when last
607                  * validated.
608                  */
609
610                 pcpu->floor_freq = tunables->hispeed_freq;
611                 pcpu->floor_validate_time = ktime_to_us(ktime_get());
612         }
613
614         spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
615
616         if (anyboost)
617                 wake_up_process(speedchange_task);
618 }
619
620 static int cpufreq_interactive_notifier(
621         struct notifier_block *nb, unsigned long val, void *data)
622 {
623         struct cpufreq_freqs *freq = data;
624         struct cpufreq_interactive_cpuinfo *pcpu;
625         int cpu;
626         unsigned long flags;
627
628         if (val == CPUFREQ_POSTCHANGE) {
629                 pcpu = &per_cpu(cpuinfo, freq->cpu);
630                 if (!down_read_trylock(&pcpu->enable_sem))
631                         return 0;
632                 if (!pcpu->governor_enabled) {
633                         up_read(&pcpu->enable_sem);
634                         return 0;
635                 }
636
637                 for_each_cpu(cpu, pcpu->policy->cpus) {
638                         struct cpufreq_interactive_cpuinfo *pjcpu =
639                                 &per_cpu(cpuinfo, cpu);
640                         if (cpu != freq->cpu) {
641                                 if (!down_read_trylock(&pjcpu->enable_sem))
642                                         continue;
643                                 if (!pjcpu->governor_enabled) {
644                                         up_read(&pjcpu->enable_sem);
645                                         continue;
646                                 }
647                         }
648                         spin_lock_irqsave(&pjcpu->load_lock, flags);
649                         update_load(cpu);
650                         spin_unlock_irqrestore(&pjcpu->load_lock, flags);
651                         if (cpu != freq->cpu)
652                                 up_read(&pjcpu->enable_sem);
653                 }
654
655                 up_read(&pcpu->enable_sem);
656         }
657         return 0;
658 }
659
660 static struct notifier_block cpufreq_notifier_block = {
661         .notifier_call = cpufreq_interactive_notifier,
662 };
663
664 static unsigned int *get_tokenized_data(const char *buf, int *num_tokens)
665 {
666         const char *cp;
667         int i;
668         int ntokens = 1;
669         unsigned int *tokenized_data;
670         int err = -EINVAL;
671
672         cp = buf;
673         while ((cp = strpbrk(cp + 1, " :")))
674                 ntokens++;
675
676         if (!(ntokens & 0x1))
677                 goto err;
678
679         tokenized_data = kmalloc(ntokens * sizeof(unsigned int), GFP_KERNEL);
680         if (!tokenized_data) {
681                 err = -ENOMEM;
682                 goto err;
683         }
684
685         cp = buf;
686         i = 0;
687         while (i < ntokens) {
688                 if (sscanf(cp, "%u", &tokenized_data[i++]) != 1)
689                         goto err_kfree;
690
691                 cp = strpbrk(cp, " :");
692                 if (!cp)
693                         break;
694                 cp++;
695         }
696
697         if (i != ntokens)
698                 goto err_kfree;
699
700         *num_tokens = ntokens;
701         return tokenized_data;
702
703 err_kfree:
704         kfree(tokenized_data);
705 err:
706         return ERR_PTR(err);
707 }
708
709 static ssize_t show_target_loads(struct cpufreq_interactive_tunables *tunables,
710                 char *buf)
711 {
712         int i;
713         ssize_t ret = 0;
714         unsigned long flags;
715
716         spin_lock_irqsave(&tunables->target_loads_lock, flags);
717
718         for (i = 0; i < tunables->ntarget_loads; i++)
719                 ret += sprintf(buf + ret, "%u%s", tunables->target_loads[i],
720                                i & 0x1 ? ":" : " ");
721
722         ret += sprintf(buf + ret, "\n");
723         spin_unlock_irqrestore(&tunables->target_loads_lock, flags);
724         return ret;
725 }
726
727 static ssize_t store_target_loads(struct cpufreq_interactive_tunables *tunables,
728                 const char *buf, size_t count)
729 {
730         int ntokens;
731         unsigned int *new_target_loads = NULL;
732         unsigned long flags;
733
734         new_target_loads = get_tokenized_data(buf, &ntokens);
735         if (IS_ERR(new_target_loads))
736                 return PTR_RET(new_target_loads);
737
738         spin_lock_irqsave(&tunables->target_loads_lock, flags);
739         if (tunables->target_loads != default_target_loads)
740                 kfree(tunables->target_loads);
741         tunables->target_loads = new_target_loads;
742         tunables->ntarget_loads = ntokens;
743         spin_unlock_irqrestore(&tunables->target_loads_lock, flags);
744         return count;
745 }
746
747 static ssize_t show_above_hispeed_delay(struct cpufreq_interactive_tunables
748                 *tunables, char *buf)
749 {
750         int i;
751         ssize_t ret = 0;
752         unsigned long flags;
753
754         spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags);
755
756         for (i = 0; i < tunables->nabove_hispeed_delay; i++)
757                 ret += sprintf(buf + ret, "%u%s",
758                                tunables->above_hispeed_delay[i],
759                                i & 0x1 ? ":" : " ");
760
761         ret += sprintf(buf + ret, "\n");
762         spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags);
763         return ret;
764 }
765
766 static ssize_t store_above_hispeed_delay(struct cpufreq_interactive_tunables
767                 *tunables, const char *buf, size_t count)
768 {
769         int ntokens;
770         unsigned int *new_above_hispeed_delay = NULL;
771         unsigned long flags;
772
773         new_above_hispeed_delay = get_tokenized_data(buf, &ntokens);
774         if (IS_ERR(new_above_hispeed_delay))
775                 return PTR_RET(new_above_hispeed_delay);
776
777         spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags);
778         if (tunables->above_hispeed_delay != default_above_hispeed_delay)
779                 kfree(tunables->above_hispeed_delay);
780         tunables->above_hispeed_delay = new_above_hispeed_delay;
781         tunables->nabove_hispeed_delay = ntokens;
782         spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags);
783         return count;
784
785 }
786
787 static ssize_t show_hispeed_freq(struct cpufreq_interactive_tunables *tunables,
788                 char *buf)
789 {
790         return sprintf(buf, "%u\n", tunables->hispeed_freq);
791 }
792
793 static ssize_t store_hispeed_freq(struct cpufreq_interactive_tunables *tunables,
794                 const char *buf, size_t count)
795 {
796         int ret;
797         long unsigned int val;
798
799         ret = strict_strtoul(buf, 0, &val);
800         if (ret < 0)
801                 return ret;
802         tunables->hispeed_freq = val;
803         return count;
804 }
805
806 static ssize_t show_go_hispeed_load(struct cpufreq_interactive_tunables
807                 *tunables, char *buf)
808 {
809         return sprintf(buf, "%lu\n", tunables->go_hispeed_load);
810 }
811
812 static ssize_t store_go_hispeed_load(struct cpufreq_interactive_tunables
813                 *tunables, const char *buf, size_t count)
814 {
815         int ret;
816         unsigned long val;
817
818         ret = strict_strtoul(buf, 0, &val);
819         if (ret < 0)
820                 return ret;
821         tunables->go_hispeed_load = val;
822         return count;
823 }
824
825 static ssize_t show_min_sample_time(struct cpufreq_interactive_tunables
826                 *tunables, char *buf)
827 {
828         return sprintf(buf, "%lu\n", tunables->min_sample_time);
829 }
830
831 static ssize_t store_min_sample_time(struct cpufreq_interactive_tunables
832                 *tunables, const char *buf, size_t count)
833 {
834         int ret;
835         unsigned long val;
836
837         ret = strict_strtoul(buf, 0, &val);
838         if (ret < 0)
839                 return ret;
840         tunables->min_sample_time = val;
841         return count;
842 }
843
844 static ssize_t show_timer_rate(struct cpufreq_interactive_tunables *tunables,
845                 char *buf)
846 {
847         return sprintf(buf, "%lu\n", tunables->timer_rate);
848 }
849
850 static ssize_t store_timer_rate(struct cpufreq_interactive_tunables *tunables,
851                 const char *buf, size_t count)
852 {
853         int ret;
854         unsigned long val;
855
856         ret = strict_strtoul(buf, 0, &val);
857         if (ret < 0)
858                 return ret;
859         tunables->timer_rate = val;
860         return count;
861 }
862
863 static ssize_t show_timer_slack(struct cpufreq_interactive_tunables *tunables,
864                 char *buf)
865 {
866         return sprintf(buf, "%d\n", tunables->timer_slack_val);
867 }
868
869 static ssize_t store_timer_slack(struct cpufreq_interactive_tunables *tunables,
870                 const char *buf, size_t count)
871 {
872         int ret;
873         unsigned long val;
874
875         ret = kstrtol(buf, 10, &val);
876         if (ret < 0)
877                 return ret;
878
879         tunables->timer_slack_val = val;
880         return count;
881 }
882
883 static ssize_t show_boost(struct cpufreq_interactive_tunables *tunables,
884                           char *buf)
885 {
886         return sprintf(buf, "%d\n", tunables->boost_val);
887 }
888
889 static ssize_t store_boost(struct cpufreq_interactive_tunables *tunables,
890                            const char *buf, size_t count)
891 {
892         int ret;
893         unsigned long val;
894
895         ret = kstrtoul(buf, 0, &val);
896         if (ret < 0)
897                 return ret;
898
899         tunables->boost_val = val;
900
901         if (tunables->boost_val) {
902                 trace_cpufreq_interactive_boost("on");
903                 cpufreq_interactive_boost();
904         } else {
905                 trace_cpufreq_interactive_unboost("off");
906         }
907
908         return count;
909 }
910
911 static ssize_t store_boostpulse(struct cpufreq_interactive_tunables *tunables,
912                                 const char *buf, size_t count)
913 {
914         int ret;
915         unsigned long val;
916
917         ret = kstrtoul(buf, 0, &val);
918         if (ret < 0)
919                 return ret;
920
921         tunables->boostpulse_endtime = ktime_to_us(ktime_get()) +
922                 tunables->boostpulse_duration_val;
923         trace_cpufreq_interactive_boost("pulse");
924         cpufreq_interactive_boost();
925         return count;
926 }
927
928 static ssize_t show_boostpulse_duration(struct cpufreq_interactive_tunables
929                 *tunables, char *buf)
930 {
931         return sprintf(buf, "%d\n", tunables->boostpulse_duration_val);
932 }
933
934 static ssize_t store_boostpulse_duration(struct cpufreq_interactive_tunables
935                 *tunables, const char *buf, size_t count)
936 {
937         int ret;
938         unsigned long val;
939
940         ret = kstrtoul(buf, 0, &val);
941         if (ret < 0)
942                 return ret;
943
944         tunables->boostpulse_duration_val = val;
945         return count;
946 }
947
948 static ssize_t show_io_is_busy(struct cpufreq_interactive_tunables *tunables,
949                 char *buf)
950 {
951         return sprintf(buf, "%u\n", tunables->io_is_busy);
952 }
953
954 static ssize_t store_io_is_busy(struct cpufreq_interactive_tunables *tunables,
955                 const char *buf, size_t count)
956 {
957         int ret;
958         unsigned long val;
959
960         ret = kstrtoul(buf, 0, &val);
961         if (ret < 0)
962                 return ret;
963         tunables->io_is_busy = val;
964         return count;
965 }
966
967 /*
968  * Create show/store routines
969  * - sys: One governor instance for complete SYSTEM
970  * - pol: One governor instance per struct cpufreq_policy
971  */
972 #define show_gov_pol_sys(file_name)                                     \
973 static ssize_t show_##file_name##_gov_sys                               \
974 (struct kobject *kobj, struct attribute *attr, char *buf)               \
975 {                                                                       \
976         return show_##file_name(common_tunables, buf);                  \
977 }                                                                       \
978                                                                         \
979 static ssize_t show_##file_name##_gov_pol                               \
980 (struct cpufreq_policy *policy, char *buf)                              \
981 {                                                                       \
982         return show_##file_name(policy->governor_data, buf);            \
983 }
984
985 #define store_gov_pol_sys(file_name)                                    \
986 static ssize_t store_##file_name##_gov_sys                              \
987 (struct kobject *kobj, struct attribute *attr, const char *buf,         \
988         size_t count)                                                   \
989 {                                                                       \
990         return store_##file_name(common_tunables, buf, count);          \
991 }                                                                       \
992                                                                         \
993 static ssize_t store_##file_name##_gov_pol                              \
994 (struct cpufreq_policy *policy, const char *buf, size_t count)          \
995 {                                                                       \
996         return store_##file_name(policy->governor_data, buf, count);    \
997 }
998
999 #define show_store_gov_pol_sys(file_name)                               \
1000 show_gov_pol_sys(file_name);                                            \
1001 store_gov_pol_sys(file_name)
1002
1003 show_store_gov_pol_sys(target_loads);
1004 show_store_gov_pol_sys(above_hispeed_delay);
1005 show_store_gov_pol_sys(hispeed_freq);
1006 show_store_gov_pol_sys(go_hispeed_load);
1007 show_store_gov_pol_sys(min_sample_time);
1008 show_store_gov_pol_sys(timer_rate);
1009 show_store_gov_pol_sys(timer_slack);
1010 show_store_gov_pol_sys(boost);
1011 store_gov_pol_sys(boostpulse);
1012 show_store_gov_pol_sys(boostpulse_duration);
1013 show_store_gov_pol_sys(io_is_busy);
1014
1015 gov_sys_pol_attr_rw(target_loads);
1016 gov_sys_pol_attr_rw(above_hispeed_delay);
1017 gov_sys_pol_attr_rw(hispeed_freq);
1018 gov_sys_pol_attr_rw(go_hispeed_load);
1019 gov_sys_pol_attr_rw(min_sample_time);
1020 gov_sys_pol_attr_rw(timer_rate);
1021 gov_sys_pol_attr_rw(timer_slack);
1022 gov_sys_pol_attr_rw(boost);
1023 gov_sys_pol_attr_rw(boostpulse_duration);
1024 gov_sys_pol_attr_rw(io_is_busy);
1025
1026 static struct global_attr boostpulse_gov_sys =
1027         __ATTR(boostpulse, 0200, NULL, store_boostpulse_gov_sys);
1028
1029 static struct freq_attr boostpulse_gov_pol =
1030         __ATTR(boostpulse, 0200, NULL, store_boostpulse_gov_pol);
1031
1032 /* One Governor instance for entire system */
1033 static struct attribute *interactive_attributes_gov_sys[] = {
1034         &target_loads_gov_sys.attr,
1035         &above_hispeed_delay_gov_sys.attr,
1036         &hispeed_freq_gov_sys.attr,
1037         &go_hispeed_load_gov_sys.attr,
1038         &min_sample_time_gov_sys.attr,
1039         &timer_rate_gov_sys.attr,
1040         &timer_slack_gov_sys.attr,
1041         &boost_gov_sys.attr,
1042         &boostpulse_gov_sys.attr,
1043         &boostpulse_duration_gov_sys.attr,
1044         &io_is_busy_gov_sys.attr,
1045         NULL,
1046 };
1047
1048 static struct attribute_group interactive_attr_group_gov_sys = {
1049         .attrs = interactive_attributes_gov_sys,
1050         .name = "interactive",
1051 };
1052
1053 /* Per policy governor instance */
1054 static struct attribute *interactive_attributes_gov_pol[] = {
1055         &target_loads_gov_pol.attr,
1056         &above_hispeed_delay_gov_pol.attr,
1057         &hispeed_freq_gov_pol.attr,
1058         &go_hispeed_load_gov_pol.attr,
1059         &min_sample_time_gov_pol.attr,
1060         &timer_rate_gov_pol.attr,
1061         &timer_slack_gov_pol.attr,
1062         &boost_gov_pol.attr,
1063         &boostpulse_gov_pol.attr,
1064         &boostpulse_duration_gov_pol.attr,
1065         &io_is_busy_gov_pol.attr,
1066         NULL,
1067 };
1068
1069 static struct attribute_group interactive_attr_group_gov_pol = {
1070         .attrs = interactive_attributes_gov_pol,
1071         .name = "interactive",
1072 };
1073
1074 static struct attribute_group *get_sysfs_attr(void)
1075 {
1076         if (have_governor_per_policy())
1077                 return &interactive_attr_group_gov_pol;
1078         else
1079                 return &interactive_attr_group_gov_sys;
1080 }
1081
1082 static int cpufreq_interactive_idle_notifier(struct notifier_block *nb,
1083                                              unsigned long val,
1084                                              void *data)
1085 {
1086         switch (val) {
1087         case IDLE_START:
1088                 cpufreq_interactive_idle_start();
1089                 break;
1090         case IDLE_END:
1091                 cpufreq_interactive_idle_end();
1092                 break;
1093         }
1094
1095         return 0;
1096 }
1097
1098 static struct notifier_block cpufreq_interactive_idle_nb = {
1099         .notifier_call = cpufreq_interactive_idle_notifier,
1100 };
1101
1102 static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
1103                 unsigned int event)
1104 {
1105         int rc;
1106         unsigned int j;
1107         struct cpufreq_interactive_cpuinfo *pcpu;
1108         struct cpufreq_frequency_table *freq_table;
1109         struct cpufreq_interactive_tunables *tunables;
1110
1111         if (have_governor_per_policy())
1112                 tunables = policy->governor_data;
1113         else
1114                 tunables = common_tunables;
1115
1116         WARN_ON(!tunables && (event != CPUFREQ_GOV_POLICY_INIT));
1117
1118         switch (event) {
1119         case CPUFREQ_GOV_POLICY_INIT:
1120                 if (have_governor_per_policy()) {
1121                         WARN_ON(tunables);
1122                 } else if (tunables) {
1123                         tunables->usage_count++;
1124                         policy->governor_data = tunables;
1125                         return 0;
1126                 }
1127
1128                 tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
1129                 if (!tunables) {
1130                         pr_err("%s: POLICY_INIT: kzalloc failed\n", __func__);
1131                         return -ENOMEM;
1132                 }
1133
1134                 rc = sysfs_create_group(get_governor_parent_kobj(policy),
1135                                 get_sysfs_attr());
1136                 if (rc) {
1137                         kfree(tunables);
1138                         return rc;
1139                 }
1140
1141                 tunables->usage_count = 1;
1142                 tunables->above_hispeed_delay = default_above_hispeed_delay;
1143                 tunables->nabove_hispeed_delay =
1144                         ARRAY_SIZE(default_above_hispeed_delay);
1145                 tunables->go_hispeed_load = DEFAULT_GO_HISPEED_LOAD;
1146                 tunables->target_loads = default_target_loads;
1147                 tunables->ntarget_loads = ARRAY_SIZE(default_target_loads);
1148                 tunables->min_sample_time = DEFAULT_MIN_SAMPLE_TIME;
1149                 tunables->timer_rate = DEFAULT_TIMER_RATE;
1150                 tunables->boostpulse_duration_val = DEFAULT_MIN_SAMPLE_TIME;
1151                 tunables->timer_slack_val = DEFAULT_TIMER_SLACK;
1152
1153                 spin_lock_init(&tunables->target_loads_lock);
1154                 spin_lock_init(&tunables->above_hispeed_delay_lock);
1155
1156                 if (!policy->governor->initialized) {
1157                         idle_notifier_register(&cpufreq_interactive_idle_nb);
1158                         cpufreq_register_notifier(&cpufreq_notifier_block,
1159                                         CPUFREQ_TRANSITION_NOTIFIER);
1160                 }
1161
1162                 policy->governor_data = tunables;
1163                 if (!have_governor_per_policy())
1164                         common_tunables = tunables;
1165
1166                 break;
1167
1168         case CPUFREQ_GOV_POLICY_EXIT:
1169                 if (!--tunables->usage_count) {
1170                         if (policy->governor->initialized == 1) {
1171                                 cpufreq_unregister_notifier(&cpufreq_notifier_block,
1172                                                 CPUFREQ_TRANSITION_NOTIFIER);
1173                                 idle_notifier_unregister(&cpufreq_interactive_idle_nb);
1174                         }
1175
1176                         sysfs_remove_group(get_governor_parent_kobj(policy),
1177                                         get_sysfs_attr());
1178                         kfree(tunables);
1179                         common_tunables = NULL;
1180                 }
1181
1182                 policy->governor_data = NULL;
1183                 break;
1184
1185         case CPUFREQ_GOV_START:
1186                 mutex_lock(&gov_lock);
1187
1188                 freq_table = cpufreq_frequency_get_table(policy->cpu);
1189                 if (!tunables->hispeed_freq)
1190                         tunables->hispeed_freq = policy->max;
1191
1192                 for_each_cpu(j, policy->cpus) {
1193                         pcpu = &per_cpu(cpuinfo, j);
1194                         pcpu->policy = policy;
1195                         pcpu->target_freq = policy->cur;
1196                         pcpu->freq_table = freq_table;
1197                         pcpu->floor_freq = pcpu->target_freq;
1198                         pcpu->floor_validate_time =
1199                                 ktime_to_us(ktime_get());
1200                         pcpu->hispeed_validate_time =
1201                                 pcpu->floor_validate_time;
1202                         down_write(&pcpu->enable_sem);
1203                         cpufreq_interactive_timer_start(tunables, j);
1204                         pcpu->governor_enabled = 1;
1205                         up_write(&pcpu->enable_sem);
1206                 }
1207
1208                 mutex_unlock(&gov_lock);
1209                 break;
1210
1211         case CPUFREQ_GOV_STOP:
1212                 mutex_lock(&gov_lock);
1213                 for_each_cpu(j, policy->cpus) {
1214                         pcpu = &per_cpu(cpuinfo, j);
1215                         down_write(&pcpu->enable_sem);
1216                         pcpu->governor_enabled = 0;
1217                         del_timer_sync(&pcpu->cpu_timer);
1218                         del_timer_sync(&pcpu->cpu_slack_timer);
1219                         up_write(&pcpu->enable_sem);
1220                 }
1221
1222                 mutex_unlock(&gov_lock);
1223                 break;
1224
1225         case CPUFREQ_GOV_LIMITS:
1226                 if (policy->max < policy->cur)
1227                         __cpufreq_driver_target(policy,
1228                                         policy->max, CPUFREQ_RELATION_H);
1229                 else if (policy->min > policy->cur)
1230                         __cpufreq_driver_target(policy,
1231                                         policy->min, CPUFREQ_RELATION_L);
1232                 for_each_cpu(j, policy->cpus) {
1233                         pcpu = &per_cpu(cpuinfo, j);
1234
1235                         /* hold write semaphore to avoid race */
1236                         down_write(&pcpu->enable_sem);
1237                         if (pcpu->governor_enabled == 0) {
1238                                 up_write(&pcpu->enable_sem);
1239                                 continue;
1240                         }
1241
1242                         /* update target_freq firstly */
1243                         if (policy->max < pcpu->target_freq)
1244                                 pcpu->target_freq = policy->max;
1245                         else if (policy->min > pcpu->target_freq)
1246                                 pcpu->target_freq = policy->min;
1247
1248                         /* Reschedule timer.
1249                          * Delete the timers, else the timer callback may
1250                          * return without re-arm the timer when failed
1251                          * acquire the semaphore. This race may cause timer
1252                          * stopped unexpectedly.
1253                          */
1254                         del_timer_sync(&pcpu->cpu_timer);
1255                         del_timer_sync(&pcpu->cpu_slack_timer);
1256                         cpufreq_interactive_timer_start(tunables, j);
1257                         up_write(&pcpu->enable_sem);
1258                 }
1259                 break;
1260         }
1261         return 0;
1262 }
1263
1264 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
1265 static
1266 #endif
1267 struct cpufreq_governor cpufreq_gov_interactive = {
1268         .name = "interactive",
1269         .governor = cpufreq_governor_interactive,
1270         .max_transition_latency = 10000000,
1271         .owner = THIS_MODULE,
1272 };
1273
1274 static void cpufreq_interactive_nop_timer(unsigned long data)
1275 {
1276 }
1277
1278 static int __init cpufreq_interactive_init(void)
1279 {
1280         unsigned int i;
1281         struct cpufreq_interactive_cpuinfo *pcpu;
1282         struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
1283
1284         /* Initalize per-cpu timers */
1285         for_each_possible_cpu(i) {
1286                 pcpu = &per_cpu(cpuinfo, i);
1287                 init_timer_deferrable(&pcpu->cpu_timer);
1288                 pcpu->cpu_timer.function = cpufreq_interactive_timer;
1289                 pcpu->cpu_timer.data = i;
1290                 init_timer(&pcpu->cpu_slack_timer);
1291                 pcpu->cpu_slack_timer.function = cpufreq_interactive_nop_timer;
1292                 spin_lock_init(&pcpu->load_lock);
1293                 init_rwsem(&pcpu->enable_sem);
1294         }
1295
1296         spin_lock_init(&speedchange_cpumask_lock);
1297         mutex_init(&gov_lock);
1298         speedchange_task =
1299                 kthread_create(cpufreq_interactive_speedchange_task, NULL,
1300                                "cfinteractive");
1301         if (IS_ERR(speedchange_task))
1302                 return PTR_ERR(speedchange_task);
1303
1304         sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, &param);
1305         get_task_struct(speedchange_task);
1306
1307         /* NB: wake up so the thread does not look hung to the freezer */
1308         wake_up_process(speedchange_task);
1309
1310         return cpufreq_register_governor(&cpufreq_gov_interactive);
1311 }
1312
1313 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
1314 fs_initcall(cpufreq_interactive_init);
1315 #else
1316 module_init(cpufreq_interactive_init);
1317 #endif
1318
1319 static void __exit cpufreq_interactive_exit(void)
1320 {
1321         cpufreq_unregister_governor(&cpufreq_gov_interactive);
1322         kthread_stop(speedchange_task);
1323         put_task_struct(speedchange_task);
1324 }
1325
1326 module_exit(cpufreq_interactive_exit);
1327
1328 MODULE_AUTHOR("Mike Chan <mike@android.com>");
1329 MODULE_DESCRIPTION("'cpufreq_interactive' - A cpufreq governor for "
1330         "Latency sensitive workloads");
1331 MODULE_LICENSE("GPL");