Revert "cpufreq: ondemand: Don't synchronize sample rate unless mulitple cpus present"
[firefly-linux-kernel-4.4.55.git] / drivers / cpufreq / cpufreq_ondemand.c
1 /*
2  *  drivers/cpufreq/cpufreq_ondemand.c
3  *
4  *  Copyright (C)  2001 Russell King
5  *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
6  *                      Jun Nakajima <jun.nakajima@intel.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12
13 #include <linux/kernel.h>
14 #include <linux/module.h>
15 #include <linux/init.h>
16 #include <linux/cpufreq.h>
17 #include <linux/cpu.h>
18 #include <linux/jiffies.h>
19 #include <linux/kernel_stat.h>
20 #include <linux/mutex.h>
21 #include <linux/hrtimer.h>
22 #include <linux/tick.h>
23 #include <linux/ktime.h>
24 #include <linux/sched.h>
25
26 /*
27  * dbs is used in this file as a shortform for demandbased switching
28  * It helps to keep variable names smaller, simpler
29  */
30
31 #define DEF_FREQUENCY_DOWN_DIFFERENTIAL         (10)
32 #define DEF_FREQUENCY_UP_THRESHOLD              (80)
33 #ifdef CONFIG_ARCH_RK29
34 #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL       (10)
35 #define MICRO_FREQUENCY_UP_THRESHOLD            (80)
36 #else
37 #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL       (3)
38 #define MICRO_FREQUENCY_UP_THRESHOLD            (95)
39 #endif
40 #define MICRO_FREQUENCY_MIN_SAMPLE_RATE         (10000)
41 #define MIN_FREQUENCY_UP_THRESHOLD              (11)
42 #define MAX_FREQUENCY_UP_THRESHOLD              (100)
43
44 /*
45  * The polling frequency of this governor depends on the capability of
46  * the processor. Default polling frequency is 1000 times the transition
47  * latency of the processor. The governor will work on any processor with
48  * transition latency <= 10mS, using appropriate sampling
49  * rate.
50  * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
51  * this governor will not work.
52  * All times here are in uS.
53  */
54 #define MIN_SAMPLING_RATE_RATIO                 (2)
55
56 static unsigned int min_sampling_rate;
57
58 #define LATENCY_MULTIPLIER                      (1000)
59 #define MIN_LATENCY_MULTIPLIER                  (100)
60 #define TRANSITION_LATENCY_LIMIT                (10 * 1000 * 1000)
61
62 static void do_dbs_timer(struct work_struct *work);
63 static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
64                                 unsigned int event);
65
66 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
67 static
68 #endif
69 struct cpufreq_governor cpufreq_gov_ondemand = {
70        .name                   = "ondemand",
71        .governor               = cpufreq_governor_dbs,
72        .max_transition_latency = TRANSITION_LATENCY_LIMIT,
73        .owner                  = THIS_MODULE,
74 };
75
76 /* Sampling types */
77 enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
78
79 struct cpu_dbs_info_s {
80         cputime64_t prev_cpu_idle;
81         cputime64_t prev_cpu_wall;
82         cputime64_t prev_cpu_nice;
83         struct cpufreq_policy *cur_policy;
84         struct delayed_work work;
85         struct cpufreq_frequency_table *freq_table;
86         unsigned int freq_lo;
87         unsigned int freq_lo_jiffies;
88         unsigned int freq_hi_jiffies;
89         int cpu;
90         unsigned int sample_type:1;
91         /*
92          * percpu mutex that serializes governor limit change with
93          * do_dbs_timer invocation. We do not want do_dbs_timer to run
94          * when user is changing the governor or limits.
95          */
96         struct mutex timer_mutex;
97 };
98 static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info);
99
100 static unsigned int dbs_enable; /* number of CPUs using this policy */
101
102 /*
103  * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on
104  * different CPUs. It protects dbs_enable in governor start/stop.
105  */
106 static DEFINE_MUTEX(dbs_mutex);
107
108 static struct workqueue_struct  *kondemand_wq;
109
110 static struct dbs_tuners {
111         unsigned int sampling_rate;
112         unsigned int up_threshold;
113         unsigned int down_differential;
114         unsigned int ignore_nice;
115         unsigned int powersave_bias;
116 } dbs_tuners_ins = {
117         .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
118         .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
119 #ifdef CONFIG_ARCH_RK29
120         .ignore_nice = 1,
121 #else
122         .ignore_nice = 0,
123 #endif
124         .powersave_bias = 0,
125 };
126
127 static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
128                                                         cputime64_t *wall)
129 {
130         cputime64_t idle_time;
131         cputime64_t cur_wall_time;
132         cputime64_t busy_time;
133
134         cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
135         busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
136                         kstat_cpu(cpu).cpustat.system);
137
138         busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
139         busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
140         busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
141         busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice);
142
143         idle_time = cputime64_sub(cur_wall_time, busy_time);
144         if (wall)
145                 *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
146
147         return (cputime64_t)jiffies_to_usecs(idle_time);
148 }
149
150 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
151 {
152         u64 idle_time = get_cpu_idle_time_us(cpu, wall);
153
154         if (idle_time == -1ULL)
155                 return get_cpu_idle_time_jiffy(cpu, wall);
156
157         return idle_time;
158 }
159
160 /*
161  * Find right freq to be set now with powersave_bias on.
162  * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
163  * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
164  */
165 static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
166                                           unsigned int freq_next,
167                                           unsigned int relation)
168 {
169         unsigned int freq_req, freq_reduc, freq_avg;
170         unsigned int freq_hi, freq_lo;
171         unsigned int index = 0;
172         unsigned int jiffies_total, jiffies_hi, jiffies_lo;
173         struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
174                                                    policy->cpu);
175
176         if (!dbs_info->freq_table) {
177                 dbs_info->freq_lo = 0;
178                 dbs_info->freq_lo_jiffies = 0;
179                 return freq_next;
180         }
181
182         cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
183                         relation, &index);
184         freq_req = dbs_info->freq_table[index].frequency;
185         freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000;
186         freq_avg = freq_req - freq_reduc;
187
188         /* Find freq bounds for freq_avg in freq_table */
189         index = 0;
190         cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
191                         CPUFREQ_RELATION_H, &index);
192         freq_lo = dbs_info->freq_table[index].frequency;
193         index = 0;
194         cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
195                         CPUFREQ_RELATION_L, &index);
196         freq_hi = dbs_info->freq_table[index].frequency;
197
198         /* Find out how long we have to be in hi and lo freqs */
199         if (freq_hi == freq_lo) {
200                 dbs_info->freq_lo = 0;
201                 dbs_info->freq_lo_jiffies = 0;
202                 return freq_lo;
203         }
204         jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
205         jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
206         jiffies_hi += ((freq_hi - freq_lo) / 2);
207         jiffies_hi /= (freq_hi - freq_lo);
208         jiffies_lo = jiffies_total - jiffies_hi;
209         dbs_info->freq_lo = freq_lo;
210         dbs_info->freq_lo_jiffies = jiffies_lo;
211         dbs_info->freq_hi_jiffies = jiffies_hi;
212         return freq_hi;
213 }
214
215 static void ondemand_powersave_bias_init_cpu(int cpu)
216 {
217         struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
218         dbs_info->freq_table = cpufreq_frequency_get_table(cpu);
219         dbs_info->freq_lo = 0;
220 }
221
222 static void ondemand_powersave_bias_init(void)
223 {
224         int i;
225         for_each_online_cpu(i) {
226                 ondemand_powersave_bias_init_cpu(i);
227         }
228 }
229
230 /************************** sysfs interface ************************/
231
232 static ssize_t show_sampling_rate_max(struct kobject *kobj,
233                                       struct attribute *attr, char *buf)
234 {
235         printk_once(KERN_INFO "CPUFREQ: ondemand sampling_rate_max "
236                "sysfs file is deprecated - used by: %s\n", current->comm);
237         return sprintf(buf, "%u\n", -1U);
238 }
239
240 static ssize_t show_sampling_rate_min(struct kobject *kobj,
241                                       struct attribute *attr, char *buf)
242 {
243         return sprintf(buf, "%u\n", min_sampling_rate);
244 }
245
246 #define define_one_ro(_name)            \
247 static struct global_attr _name =       \
248 __ATTR(_name, 0444, show_##_name, NULL)
249
250 define_one_ro(sampling_rate_max);
251 define_one_ro(sampling_rate_min);
252
253 /* cpufreq_ondemand Governor Tunables */
254 #define show_one(file_name, object)                                     \
255 static ssize_t show_##file_name                                         \
256 (struct kobject *kobj, struct attribute *attr, char *buf)              \
257 {                                                                       \
258         return sprintf(buf, "%u\n", dbs_tuners_ins.object);             \
259 }
260 show_one(sampling_rate, sampling_rate);
261 show_one(up_threshold, up_threshold);
262 show_one(ignore_nice_load, ignore_nice);
263 show_one(powersave_bias, powersave_bias);
264
265 /*** delete after deprecation time ***/
266
267 #define DEPRECATION_MSG(file_name)                                      \
268         printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs "       \
269                     "interface is deprecated - " #file_name "\n");
270
271 #define show_one_old(file_name)                                         \
272 static ssize_t show_##file_name##_old                                   \
273 (struct cpufreq_policy *unused, char *buf)                              \
274 {                                                                       \
275         printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs "       \
276                     "interface is deprecated - " #file_name "\n");      \
277         return show_##file_name(NULL, NULL, buf);                       \
278 }
279 show_one_old(sampling_rate);
280 show_one_old(up_threshold);
281 show_one_old(ignore_nice_load);
282 show_one_old(powersave_bias);
283 show_one_old(sampling_rate_min);
284 show_one_old(sampling_rate_max);
285
286 #define define_one_ro_old(object, _name)       \
287 static struct freq_attr object =               \
288 __ATTR(_name, 0444, show_##_name##_old, NULL)
289
290 define_one_ro_old(sampling_rate_min_old, sampling_rate_min);
291 define_one_ro_old(sampling_rate_max_old, sampling_rate_max);
292
293 /*** delete after deprecation time ***/
294
295 static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b,
296                                    const char *buf, size_t count)
297 {
298         unsigned int input;
299         int ret;
300         ret = sscanf(buf, "%u", &input);
301         if (ret != 1)
302                 return -EINVAL;
303
304         mutex_lock(&dbs_mutex);
305         dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate);
306         mutex_unlock(&dbs_mutex);
307
308         return count;
309 }
310
311 static ssize_t store_up_threshold(struct kobject *a, struct attribute *b,
312                                   const char *buf, size_t count)
313 {
314         unsigned int input;
315         int ret;
316         ret = sscanf(buf, "%u", &input);
317
318         if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
319                         input < MIN_FREQUENCY_UP_THRESHOLD) {
320                 return -EINVAL;
321         }
322
323         mutex_lock(&dbs_mutex);
324         dbs_tuners_ins.up_threshold = input;
325         mutex_unlock(&dbs_mutex);
326
327         return count;
328 }
329
330 static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
331                                       const char *buf, size_t count)
332 {
333         unsigned int input;
334         int ret;
335
336         unsigned int j;
337
338         ret = sscanf(buf, "%u", &input);
339         if (ret != 1)
340                 return -EINVAL;
341
342         if (input > 1)
343                 input = 1;
344
345         mutex_lock(&dbs_mutex);
346         if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */
347                 mutex_unlock(&dbs_mutex);
348                 return count;
349         }
350         dbs_tuners_ins.ignore_nice = input;
351
352         /* we need to re-evaluate prev_cpu_idle */
353         for_each_online_cpu(j) {
354                 struct cpu_dbs_info_s *dbs_info;
355                 dbs_info = &per_cpu(od_cpu_dbs_info, j);
356                 dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
357                                                 &dbs_info->prev_cpu_wall);
358                 if (dbs_tuners_ins.ignore_nice)
359                         dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
360
361         }
362         mutex_unlock(&dbs_mutex);
363
364         return count;
365 }
366
367 static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b,
368                                     const char *buf, size_t count)
369 {
370         unsigned int input;
371         int ret;
372         ret = sscanf(buf, "%u", &input);
373
374         if (ret != 1)
375                 return -EINVAL;
376
377         if (input > 1000)
378                 input = 1000;
379
380         mutex_lock(&dbs_mutex);
381         dbs_tuners_ins.powersave_bias = input;
382         ondemand_powersave_bias_init();
383         mutex_unlock(&dbs_mutex);
384
385         return count;
386 }
387
388 #define define_one_rw(_name) \
389 static struct global_attr _name = \
390 __ATTR(_name, 0644, show_##_name, store_##_name)
391
392 define_one_rw(sampling_rate);
393 define_one_rw(up_threshold);
394 define_one_rw(ignore_nice_load);
395 define_one_rw(powersave_bias);
396
397 static struct attribute *dbs_attributes[] = {
398         &sampling_rate_max.attr,
399         &sampling_rate_min.attr,
400         &sampling_rate.attr,
401         &up_threshold.attr,
402         &ignore_nice_load.attr,
403         &powersave_bias.attr,
404         NULL
405 };
406
407 static struct attribute_group dbs_attr_group = {
408         .attrs = dbs_attributes,
409         .name = "ondemand",
410 };
411
412 /*** delete after deprecation time ***/
413
414 #define write_one_old(file_name)                                        \
415 static ssize_t store_##file_name##_old                                  \
416 (struct cpufreq_policy *unused, const char *buf, size_t count)          \
417 {                                                                       \
418        printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs "        \
419                    "interface is deprecated - " #file_name "\n");       \
420        return store_##file_name(NULL, NULL, buf, count);                \
421 }
422 write_one_old(sampling_rate);
423 write_one_old(up_threshold);
424 write_one_old(ignore_nice_load);
425 write_one_old(powersave_bias);
426
427 #define define_one_rw_old(object, _name)       \
428 static struct freq_attr object =               \
429 __ATTR(_name, 0644, show_##_name##_old, store_##_name##_old)
430
431 define_one_rw_old(sampling_rate_old, sampling_rate);
432 define_one_rw_old(up_threshold_old, up_threshold);
433 define_one_rw_old(ignore_nice_load_old, ignore_nice_load);
434 define_one_rw_old(powersave_bias_old, powersave_bias);
435
436 static struct attribute *dbs_attributes_old[] = {
437        &sampling_rate_max_old.attr,
438        &sampling_rate_min_old.attr,
439        &sampling_rate_old.attr,
440        &up_threshold_old.attr,
441        &ignore_nice_load_old.attr,
442        &powersave_bias_old.attr,
443        NULL
444 };
445
446 static struct attribute_group dbs_attr_group_old = {
447        .attrs = dbs_attributes_old,
448        .name = "ondemand",
449 };
450
451 /*** delete after deprecation time ***/
452
453 /************************** sysfs end ************************/
454
455 static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
456 {
457         unsigned int max_load_freq;
458
459         struct cpufreq_policy *policy;
460         unsigned int j;
461
462         this_dbs_info->freq_lo = 0;
463         policy = this_dbs_info->cur_policy;
464
465         /*
466          * Every sampling_rate, we check, if current idle time is less
467          * than 20% (default), then we try to increase frequency
468          * Every sampling_rate, we look for a the lowest
469          * frequency which can sustain the load while keeping idle time over
470          * 30%. If such a frequency exist, we try to decrease to this frequency.
471          *
472          * Any frequency increase takes it to the maximum frequency.
473          * Frequency reduction happens at minimum steps of
474          * 5% (default) of current frequency
475          */
476
477         /* Get Absolute Load - in terms of freq */
478         max_load_freq = 0;
479
480         for_each_cpu(j, policy->cpus) {
481                 struct cpu_dbs_info_s *j_dbs_info;
482                 cputime64_t cur_wall_time, cur_idle_time;
483                 unsigned int idle_time, wall_time;
484                 unsigned int load, load_freq;
485                 int freq_avg;
486
487                 j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
488
489                 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
490
491                 wall_time = (unsigned int) cputime64_sub(cur_wall_time,
492                                 j_dbs_info->prev_cpu_wall);
493                 j_dbs_info->prev_cpu_wall = cur_wall_time;
494
495                 idle_time = (unsigned int) cputime64_sub(cur_idle_time,
496                                 j_dbs_info->prev_cpu_idle);
497                 j_dbs_info->prev_cpu_idle = cur_idle_time;
498
499                 if (dbs_tuners_ins.ignore_nice) {
500                         cputime64_t cur_nice;
501                         unsigned long cur_nice_jiffies;
502
503                         cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
504                                          j_dbs_info->prev_cpu_nice);
505                         /*
506                          * Assumption: nice time between sampling periods will
507                          * be less than 2^32 jiffies for 32 bit sys
508                          */
509                         cur_nice_jiffies = (unsigned long)
510                                         cputime64_to_jiffies64(cur_nice);
511
512                         j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
513                         idle_time += jiffies_to_usecs(cur_nice_jiffies);
514                 }
515
516                 if (unlikely(!wall_time || wall_time < idle_time))
517                         continue;
518
519                 load = 100 * (wall_time - idle_time) / wall_time;
520
521                 freq_avg = __cpufreq_driver_getavg(policy, j);
522                 if (freq_avg <= 0)
523                         freq_avg = policy->cur;
524
525                 load_freq = load * freq_avg;
526                 if (load_freq > max_load_freq)
527                         max_load_freq = load_freq;
528         }
529
530         /* Check for frequency increase */
531         if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
532                 /* if we are already at full speed then break out early */
533                 if (!dbs_tuners_ins.powersave_bias) {
534                         if (policy->cur == policy->max)
535                                 return;
536
537                         __cpufreq_driver_target(policy, policy->max,
538                                 CPUFREQ_RELATION_H);
539                 } else {
540                         int freq = powersave_bias_target(policy, policy->max,
541                                         CPUFREQ_RELATION_H);
542                         __cpufreq_driver_target(policy, freq,
543                                 CPUFREQ_RELATION_L);
544                 }
545                 return;
546         }
547
548         /* Check for frequency decrease */
549         /* if we cannot reduce the frequency anymore, break out early */
550         if (policy->cur == policy->min)
551                 return;
552
553         /*
554          * The optimal frequency is the frequency that is the lowest that
555          * can support the current CPU usage without triggering the up
556          * policy. To be safe, we focus 10 points under the threshold.
557          */
558         if (max_load_freq <
559             (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) *
560              policy->cur) {
561                 unsigned int freq_next;
562                 freq_next = max_load_freq /
563                                 (dbs_tuners_ins.up_threshold -
564                                  dbs_tuners_ins.down_differential);
565
566                 if (!dbs_tuners_ins.powersave_bias) {
567                         __cpufreq_driver_target(policy, freq_next,
568                                         CPUFREQ_RELATION_L);
569                 } else {
570                         int freq = powersave_bias_target(policy, freq_next,
571                                         CPUFREQ_RELATION_L);
572                         __cpufreq_driver_target(policy, freq,
573                                 CPUFREQ_RELATION_L);
574                 }
575         }
576 }
577
578 static void do_dbs_timer(struct work_struct *work)
579 {
580         struct cpu_dbs_info_s *dbs_info =
581                 container_of(work, struct cpu_dbs_info_s, work.work);
582         unsigned int cpu = dbs_info->cpu;
583         int sample_type = dbs_info->sample_type;
584
585         /* We want all CPUs to do sampling nearly on same jiffy */
586         int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
587
588         delay -= jiffies % delay;
589         mutex_lock(&dbs_info->timer_mutex);
590
591         /* Common NORMAL_SAMPLE setup */
592         dbs_info->sample_type = DBS_NORMAL_SAMPLE;
593         if (!dbs_tuners_ins.powersave_bias ||
594             sample_type == DBS_NORMAL_SAMPLE) {
595                 dbs_check_cpu(dbs_info);
596                 if (dbs_info->freq_lo) {
597                         /* Setup timer for SUB_SAMPLE */
598                         dbs_info->sample_type = DBS_SUB_SAMPLE;
599                         delay = dbs_info->freq_hi_jiffies;
600                 }
601         } else {
602                 __cpufreq_driver_target(dbs_info->cur_policy,
603                         dbs_info->freq_lo, CPUFREQ_RELATION_H);
604         }
605         queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
606         mutex_unlock(&dbs_info->timer_mutex);
607 }
608
609 static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
610 {
611         /* We want all CPUs to do sampling nearly on same jiffy */
612         int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
613         delay -= jiffies % delay;
614
615         dbs_info->sample_type = DBS_NORMAL_SAMPLE;
616         INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
617         queue_delayed_work_on(dbs_info->cpu, kondemand_wq, &dbs_info->work,
618                 delay);
619 }
620
621 static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
622 {
623         cancel_delayed_work_sync(&dbs_info->work);
624 }
625
626 static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
627                                    unsigned int event)
628 {
629         unsigned int cpu = policy->cpu;
630         struct cpu_dbs_info_s *this_dbs_info;
631         unsigned int j;
632         int rc;
633
634         this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
635
636         switch (event) {
637         case CPUFREQ_GOV_START:
638                 if ((!cpu_online(cpu)) || (!policy->cur))
639                         return -EINVAL;
640
641                 mutex_lock(&dbs_mutex);
642
643                 rc = sysfs_create_group(&policy->kobj, &dbs_attr_group_old);
644                 if (rc) {
645                         mutex_unlock(&dbs_mutex);
646                         return rc;
647                 }
648
649                 dbs_enable++;
650                 for_each_cpu(j, policy->cpus) {
651                         struct cpu_dbs_info_s *j_dbs_info;
652                         j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
653                         j_dbs_info->cur_policy = policy;
654
655                         j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
656                                                 &j_dbs_info->prev_cpu_wall);
657                         if (dbs_tuners_ins.ignore_nice) {
658                                 j_dbs_info->prev_cpu_nice =
659                                                 kstat_cpu(j).cpustat.nice;
660                         }
661                 }
662                 this_dbs_info->cpu = cpu;
663                 ondemand_powersave_bias_init_cpu(cpu);
664                 /*
665                  * Start the timerschedule work, when this governor
666                  * is used for first time
667                  */
668                 if (dbs_enable == 1) {
669                         unsigned int latency;
670
671                         rc = sysfs_create_group(cpufreq_global_kobject,
672                                                 &dbs_attr_group);
673                         if (rc) {
674                                 mutex_unlock(&dbs_mutex);
675                                 return rc;
676                         }
677
678                         /* policy latency is in nS. Convert it to uS first */
679                         latency = policy->cpuinfo.transition_latency / 1000;
680                         if (latency == 0)
681                                 latency = 1;
682                         /* Bring kernel and HW constraints together */
683                         min_sampling_rate = max(min_sampling_rate,
684                                         MIN_LATENCY_MULTIPLIER * latency);
685                         dbs_tuners_ins.sampling_rate =
686                                 max(min_sampling_rate,
687                                     latency * LATENCY_MULTIPLIER);
688                 }
689                 mutex_unlock(&dbs_mutex);
690
691                 mutex_init(&this_dbs_info->timer_mutex);
692                 dbs_timer_init(this_dbs_info);
693                 break;
694
695         case CPUFREQ_GOV_STOP:
696                 dbs_timer_exit(this_dbs_info);
697
698                 mutex_lock(&dbs_mutex);
699                 sysfs_remove_group(&policy->kobj, &dbs_attr_group_old);
700                 mutex_destroy(&this_dbs_info->timer_mutex);
701                 dbs_enable--;
702                 mutex_unlock(&dbs_mutex);
703                 if (!dbs_enable)
704                         sysfs_remove_group(cpufreq_global_kobject,
705                                            &dbs_attr_group);
706
707                 break;
708
709         case CPUFREQ_GOV_LIMITS:
710                 mutex_lock(&this_dbs_info->timer_mutex);
711                 if (policy->max < this_dbs_info->cur_policy->cur)
712                         __cpufreq_driver_target(this_dbs_info->cur_policy,
713                                 policy->max, CPUFREQ_RELATION_H);
714                 else if (policy->min > this_dbs_info->cur_policy->cur)
715                         __cpufreq_driver_target(this_dbs_info->cur_policy,
716                                 policy->min, CPUFREQ_RELATION_L);
717                 mutex_unlock(&this_dbs_info->timer_mutex);
718                 break;
719         }
720         return 0;
721 }
722
723 static int __init cpufreq_gov_dbs_init(void)
724 {
725         int err;
726         cputime64_t wall;
727         u64 idle_time;
728         int cpu = get_cpu();
729
730         idle_time = get_cpu_idle_time_us(cpu, &wall);
731         put_cpu();
732         if (idle_time != -1ULL) {
733                 /* Idle micro accounting is supported. Use finer thresholds */
734                 dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
735                 dbs_tuners_ins.down_differential =
736                                         MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
737                 /*
738                  * In no_hz/micro accounting case we set the minimum frequency
739                  * not depending on HZ, but fixed (very low). The deferred
740                  * timer might skip some samples if idle/sleeping as needed.
741                 */
742                 min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
743         } else {
744                 /* For correct statistics, we need 10 ticks for each measure */
745                 min_sampling_rate =
746                         MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10);
747         }
748
749         kondemand_wq = create_workqueue("kondemand");
750         if (!kondemand_wq) {
751                 printk(KERN_ERR "Creation of kondemand failed\n");
752                 return -EFAULT;
753         }
754         err = cpufreq_register_governor(&cpufreq_gov_ondemand);
755         if (err)
756                 destroy_workqueue(kondemand_wq);
757
758         return err;
759 }
760
761 static void __exit cpufreq_gov_dbs_exit(void)
762 {
763         cpufreq_unregister_governor(&cpufreq_gov_ondemand);
764         destroy_workqueue(kondemand_wq);
765 }
766
767
768 MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
769 MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
770 MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "
771         "Low Latency Frequency Transition capable processors");
772 MODULE_LICENSE("GPL");
773
774 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
775 fs_initcall(cpufreq_gov_dbs_init);
776 #else
777 module_init(cpufreq_gov_dbs_init);
778 #endif
779 module_exit(cpufreq_gov_dbs_exit);