sched/core: Add first cpu w/ max/min orig capacity to root domain

[firefly-linux-kernel-4.4.55.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 42dfdd567179d1db4667ba7d2dd66d1e8efa7a99..495bc41907d6288122d7ab32428eb12f82324c97 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -89,6 +89,7 @@
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/sched.h>
+#include "walt.h"
  
  DEFINE_MUTEX(sched_domains_mutex);
  DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@ -287,6 +288,18 @@ int sysctl_sched_rt_runtime = 950000;
  /* cpus with isolated domains */
  cpumask_var_t cpu_isolated_map;
  
+struct rq *
+lock_rq_of(struct task_struct *p, unsigned long *flags)
+{
+       return task_rq_lock(p, flags);
+}
+
+void
+unlock_rq_of(struct rq *rq, struct task_struct *p, unsigned long *flags)
+{
+       task_rq_unlock(rq, p, flags);
+}
+
  /*
   * this_rq_lock - lock this runqueue and disable interrupts.
   */
@@ -627,7 +640,10 @@ int get_nohz_timer_target(void)
         rcu_read_lock();
         for_each_domain(cpu, sd) {
                 for_each_cpu(i, sched_domain_span(sd)) {
-                       if (!idle_cpu(i) && is_housekeeping_cpu(cpu)) {
+                       if (cpu == i)
+                               continue;
+
+                       if (!idle_cpu(i) && is_housekeeping_cpu(i)) {
                                 cpu = i;
                                 goto unlock;
                         }
@@ -1073,7 +1089,9 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new
  
         dequeue_task(rq, p, 0);
         p->on_rq = TASK_ON_RQ_MIGRATING;
+       double_lock_balance(rq, cpu_rq(new_cpu));
         set_task_cpu(p, new_cpu);
+       double_unlock_balance(rq, cpu_rq(new_cpu));
         raw_spin_unlock(&rq->lock);
  
         rq = cpu_rq(new_cpu);
@@ -1297,6 +1315,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
                         p->sched_class->migrate_task_rq(p);
                 p->se.nr_migrations++;
                 perf_event_task_migrate(p);
+
+               walt_fixup_busy_time(p, new_cpu);
         }
  
         __set_task_cpu(p, new_cpu);
@@ -1925,6 +1945,10 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
  {
         unsigned long flags;
         int cpu, success = 0;
+#ifdef CONFIG_SMP
+       struct rq *rq;
+       u64 wallclock;
+#endif
  
         /*
          * If we are going to wake up a thread waiting for CONDITION we
@@ -1942,6 +1966,28 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
         success = 1; /* we're going to change ->state */
         cpu = task_cpu(p);
  
+       /*
+        * Ensure we load p->on_rq _after_ p->state, otherwise it would
+        * be possible to, falsely, observe p->on_rq == 0 and get stuck
+        * in smp_cond_load_acquire() below.
+        *
+        * sched_ttwu_pending()                 try_to_wake_up()
+        *   [S] p->on_rq = 1;                  [L] P->state
+        *       UNLOCK rq->lock  -----.
+        *                              \
+        *                               +---   RMB
+        * schedule()                   /
+        *       LOCK rq->lock    -----'
+        *       UNLOCK rq->lock
+        *
+        * [task p]
+        *   [S] p->state = UNINTERRUPTIBLE     [L] p->on_rq
+        *
+        * Pairs with the UNLOCK+LOCK on rq->lock from the
+        * last wakeup of our task and the schedule that got our task
+        * current.
+        */
+       smp_rmb();
         if (p->on_rq && ttwu_remote(p, wake_flags))
                 goto stat;
  
@@ -1982,6 +2028,14 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
          */
         smp_rmb();
  
+       rq = cpu_rq(task_cpu(p));
+
+       raw_spin_lock(&rq->lock);
+       wallclock = walt_ktime_clock();
+       walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
+       walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
+       raw_spin_unlock(&rq->lock);
+
         p->sched_contributes_to_load = !!task_contributes_to_load(p);
         p->state = TASK_WAKING;
  
@@ -1989,10 +2043,12 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
                 p->sched_class->task_waking(p);
  
         cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
+
         if (task_cpu(p) != cpu) {
                 wake_flags |= WF_MIGRATED;
                 set_task_cpu(p, cpu);
         }
+
  #endif /* CONFIG_SMP */
  
         ttwu_queue(p, cpu);
@@ -2041,8 +2097,13 @@ static void try_to_wake_up_local(struct task_struct *p)
  
         trace_sched_waking(p);
  
-       if (!task_on_rq_queued(p))
+       if (!task_on_rq_queued(p)) {
+               u64 wallclock = walt_ktime_clock();
+
+               walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
+               walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
                 ttwu_activate(rq, p, ENQUEUE_WAKEUP);
+       }
  
         ttwu_do_wakeup(rq, p, 0);
         ttwu_stat(p, smp_processor_id(), 0);
@@ -2108,6 +2169,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
         p->se.nr_migrations             = 0;
         p->se.vruntime                  = 0;
         INIT_LIST_HEAD(&p->se.group_node);
+       walt_init_new_task_load(p);
  
  #ifdef CONFIG_SCHEDSTATS
         memset(&p->se.statistics, 0, sizeof(p->se.statistics));
@@ -2375,6 +2437,9 @@ void wake_up_new_task(struct task_struct *p)
         struct rq *rq;
  
         raw_spin_lock_irqsave(&p->pi_lock, flags);
+
+       walt_init_new_task_load(p);
+
         /* Initialize new task's runnable average */
         init_entity_runnable_average(&p->se);
  #ifdef CONFIG_SMP
@@ -2387,7 +2452,8 @@ void wake_up_new_task(struct task_struct *p)
  #endif
  
         rq = __task_rq_lock(p);
-       activate_task(rq, p, 0);
+       walt_mark_task_starting(p);
+       activate_task(rq, p, ENQUEUE_WAKEUP_NEW);
         p->on_rq = TASK_ON_RQ_QUEUED;
         trace_sched_wakeup_new(p);
         check_preempt_curr(rq, p, WF_FORK);
@@ -2768,6 +2834,36 @@ unsigned long nr_iowait_cpu(int cpu)
         return atomic_read(&this->nr_iowait);
  }
  
+#ifdef CONFIG_CPU_QUIET
+u64 nr_running_integral(unsigned int cpu)
+{
+       unsigned int seqcnt;
+       u64 integral;
+       struct rq *q;
+
+       if (cpu >= nr_cpu_ids)
+               return 0;
+
+       q = cpu_rq(cpu);
+
+       /*
+        * Update average to avoid reading stalled value if there were
+        * no run-queue changes for a long time. On the other hand if
+        * the changes are happening right now, just read current value
+        * directly.
+        */
+
+       seqcnt = read_seqcount_begin(&q->ave_seqcnt);
+       integral = do_nr_running_integral(q);
+       if (read_seqcount_retry(&q->ave_seqcnt, seqcnt)) {
+               read_seqcount_begin(&q->ave_seqcnt);
+               integral = q->nr_running_integral;
+       }
+
+       return integral;
+}
+#endif
+
  void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
  {
         struct rq *rq = this_rq();
@@ -2854,6 +2950,93 @@ unsigned long long task_sched_runtime(struct task_struct *p)
         return ns;
  }
  
+#ifdef CONFIG_CPU_FREQ_GOV_SCHED
+
+static inline
+unsigned long add_capacity_margin(unsigned long cpu_capacity)
+{
+       cpu_capacity  = cpu_capacity * capacity_margin;
+       cpu_capacity /= SCHED_CAPACITY_SCALE;
+       return cpu_capacity;
+}
+
+static inline
+unsigned long sum_capacity_reqs(unsigned long cfs_cap,
+                               struct sched_capacity_reqs *scr)
+{
+       unsigned long total = add_capacity_margin(cfs_cap + scr->rt);
+       return total += scr->dl;
+}
+
+static void sched_freq_tick_pelt(int cpu)
+{
+       unsigned long cpu_utilization = capacity_max;
+       unsigned long capacity_curr = capacity_curr_of(cpu);
+       struct sched_capacity_reqs *scr;
+
+       scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
+       if (sum_capacity_reqs(cpu_utilization, scr) < capacity_curr)
+               return;
+
+       /*
+        * To make free room for a task that is building up its "real"
+        * utilization and to harm its performance the least, request
+        * a jump to a higher OPP as soon as the margin of free capacity
+        * is impacted (specified by capacity_margin).
+        */
+       set_cfs_cpu_capacity(cpu, true, cpu_utilization);
+}
+
+#ifdef CONFIG_SCHED_WALT
+static void sched_freq_tick_walt(int cpu)
+{
+       unsigned long cpu_utilization = cpu_util(cpu);
+       unsigned long capacity_curr = capacity_curr_of(cpu);
+
+       if (walt_disabled || !sysctl_sched_use_walt_cpu_util)
+               return sched_freq_tick_pelt(cpu);
+
+       /*
+        * Add a margin to the WALT utilization.
+        * NOTE: WALT tracks a single CPU signal for all the scheduling
+        * classes, thus this margin is going to be added to the DL class as
+        * well, which is something we do not do in sched_freq_tick_pelt case.
+        */
+       cpu_utilization = add_capacity_margin(cpu_utilization);
+       if (cpu_utilization <= capacity_curr)
+               return;
+
+       /*
+        * It is likely that the load is growing so we
+        * keep the added margin in our request as an
+        * extra boost.
+        */
+       set_cfs_cpu_capacity(cpu, true, cpu_utilization);
+
+}
+#define _sched_freq_tick(cpu) sched_freq_tick_walt(cpu)
+#else
+#define _sched_freq_tick(cpu) sched_freq_tick_pelt(cpu)
+#endif /* CONFIG_SCHED_WALT */
+
+static void sched_freq_tick(int cpu)
+{
+       unsigned long capacity_orig, capacity_curr;
+
+       if (!sched_freq())
+               return;
+
+       capacity_orig = capacity_orig_of(cpu);
+       capacity_curr = capacity_curr_of(cpu);
+       if (capacity_curr == capacity_orig)
+               return;
+
+       _sched_freq_tick(cpu);
+}
+#else
+static inline void sched_freq_tick(int cpu) { }
+#endif /* CONFIG_CPU_FREQ_GOV_SCHED */
+
  /*
   * This function gets called by the timer code, with HZ frequency.
   * We call it with interrupts disabled.
@@ -2867,10 +3050,14 @@ void scheduler_tick(void)
         sched_clock_tick();
  
         raw_spin_lock(&rq->lock);
+       walt_set_window_start(rq);
         update_rq_clock(rq);
         curr->sched_class->task_tick(rq, curr, 0);
         update_cpu_load_active(rq);
+       walt_update_task_ravg(rq->curr, rq, TASK_UPDATE,
+                       walt_ktime_clock(), 0);
         calc_global_load_tick(rq);
+       sched_freq_tick(cpu);
         raw_spin_unlock(&rq->lock);
  
         perf_event_task_tick();
@@ -3107,6 +3294,7 @@ static void __sched notrace __schedule(bool preempt)
         unsigned long *switch_count;
         struct rq *rq;
         int cpu;
+       u64 wallclock;
  
         cpu = smp_processor_id();
         rq = cpu_rq(cpu);
@@ -3168,6 +3356,9 @@ static void __sched notrace __schedule(bool preempt)
                 update_rq_clock(rq);
  
         next = pick_next_task(rq, prev);
+       wallclock = walt_ktime_clock();
+       walt_update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0);
+       walt_update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0);
         clear_tsk_need_resched(prev);
         clear_preempt_need_resched();
         rq->clock_skip_update = 0;
@@ -4994,6 +5185,7 @@ void init_idle(struct task_struct *idle, int cpu)
         raw_spin_lock(&rq->lock);
  
         __sched_fork(0, idle);
+
         idle->state = TASK_RUNNING;
         idle->se.exec_start = sched_clock();
  
@@ -5375,10 +5567,61 @@ set_table_entry(struct ctl_table *entry,
         }
  }
  
+static struct ctl_table *
+sd_alloc_ctl_energy_table(struct sched_group_energy *sge)
+{
+       struct ctl_table *table = sd_alloc_ctl_entry(5);
+
+       if (table == NULL)
+               return NULL;
+
+       set_table_entry(&table[0], "nr_idle_states", &sge->nr_idle_states,
+                       sizeof(int), 0644, proc_dointvec_minmax, false);
+       set_table_entry(&table[1], "idle_states", &sge->idle_states[0].power,
+                       sge->nr_idle_states*sizeof(struct idle_state), 0644,
+                       proc_doulongvec_minmax, false);
+       set_table_entry(&table[2], "nr_cap_states", &sge->nr_cap_states,
+                       sizeof(int), 0644, proc_dointvec_minmax, false);
+       set_table_entry(&table[3], "cap_states", &sge->cap_states[0].cap,
+                       sge->nr_cap_states*sizeof(struct capacity_state), 0644,
+                       proc_doulongvec_minmax, false);
+
+       return table;
+}
+
+static struct ctl_table *
+sd_alloc_ctl_group_table(struct sched_group *sg)
+{
+       struct ctl_table *table = sd_alloc_ctl_entry(2);
+
+       if (table == NULL)
+               return NULL;
+
+       table->procname = kstrdup("energy", GFP_KERNEL);
+       table->mode = 0555;
+       table->child = sd_alloc_ctl_energy_table((struct sched_group_energy *)sg->sge);
+
+       return table;
+}
+
  static struct ctl_table *
  sd_alloc_ctl_domain_table(struct sched_domain *sd)
  {
-       struct ctl_table *table = sd_alloc_ctl_entry(14);
+       struct ctl_table *table;
+       unsigned int nr_entries = 14;
+
+       int i = 0;
+       struct sched_group *sg = sd->groups;
+
+       if (sg->sge) {
+               int nr_sgs = 0;
+
+               do {} while (nr_sgs++, sg = sg->next, sg != sd->groups);
+
+               nr_entries += nr_sgs;
+       }
+
+       table = sd_alloc_ctl_entry(nr_entries);
  
         if (table == NULL)
                 return NULL;
@@ -5411,7 +5654,19 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd)
                 sizeof(long), 0644, proc_doulongvec_minmax, false);
         set_table_entry(&table[12], "name", sd->name,
                 CORENAME_MAX_SIZE, 0444, proc_dostring, false);
-       /* &table[13] is terminator */
+       sg = sd->groups;
+       if (sg->sge) {
+               char buf[32];
+               struct ctl_table *entry = &table[13];
+
+               do {
+                       snprintf(buf, 32, "group%d", i);
+                       entry->procname = kstrdup(buf, GFP_KERNEL);
+                       entry->mode = 0555;
+                       entry->child = sd_alloc_ctl_group_table(sg);
+               } while (entry++, i++, sg = sg->next, sg != sd->groups);
+       }
+       /* &table[nr_entries-1] is terminator */
  
         return table;
  }
@@ -5527,6 +5782,9 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
         switch (action & ~CPU_TASKS_FROZEN) {
  
         case CPU_UP_PREPARE:
+               raw_spin_lock_irqsave(&rq->lock, flags);
+               walt_set_window_start(rq);
+               raw_spin_unlock_irqrestore(&rq->lock, flags);
                 rq->calc_load_update = calc_load_update;
                 account_reset_rq(rq);
                 break;
@@ -5547,6 +5805,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                 sched_ttwu_pending();
                 /* Update our root-domain */
                 raw_spin_lock_irqsave(&rq->lock, flags);
+               walt_migrate_sync_cpu(cpu);
                 if (rq->rd) {
                         BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
                         set_rq_offline(rq);
@@ -5718,7 +5977,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                 printk(KERN_CONT " %*pbl",
                        cpumask_pr_args(sched_group_cpus(group)));
                 if (group->sgc->capacity != SCHED_CAPACITY_SCALE) {
-                       printk(KERN_CONT " (cpu_capacity = %d)",
+                       printk(KERN_CONT " (cpu_capacity = %lu)",
                                 group->sgc->capacity);
                 }
  
@@ -5778,6 +6037,7 @@ static int sd_degenerate(struct sched_domain *sd)
                          SD_BALANCE_FORK |
                          SD_BALANCE_EXEC |
                          SD_SHARE_CPUCAPACITY |
+                        SD_ASYM_CPUCAPACITY |
                          SD_SHARE_PKG_RESOURCES |
                          SD_SHARE_POWERDOMAIN |
                          SD_SHARE_CAP_STATES)) {
@@ -5809,6 +6069,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
                                 SD_BALANCE_NEWIDLE |
                                 SD_BALANCE_FORK |
                                 SD_BALANCE_EXEC |
+                               SD_ASYM_CPUCAPACITY |
                                 SD_SHARE_CPUCAPACITY |
                                 SD_SHARE_PKG_RESOURCES |
                                 SD_PREFER_SIBLING |
@@ -5892,6 +6153,11 @@ static int init_rootdomain(struct root_domain *rd)
  
         if (cpupri_init(&rd->cpupri) != 0)
                 goto free_rto_mask;
+
+       init_max_cpu_capacity(&rd->max_cpu_capacity);
+
+       rd->max_cap_orig_cpu = rd->min_cap_orig_cpu = -1;
+
         return 0;
  
  free_rto_mask:
@@ -5998,6 +6264,7 @@ DEFINE_PER_CPU(struct sched_domain *, sd_numa);
  DEFINE_PER_CPU(struct sched_domain *, sd_busy);
  DEFINE_PER_CPU(struct sched_domain *, sd_asym);
  DEFINE_PER_CPU(struct sched_domain *, sd_ea);
+DEFINE_PER_CPU(struct sched_domain *, sd_scs);
  
  static void update_top_cache_domain(int cpu)
  {
@@ -6031,6 +6298,9 @@ static void update_top_cache_domain(int cpu)
                         break;
         }
         rcu_assign_pointer(per_cpu(sd_ea, cpu), ea_sd);
+
+       sd = highest_flag_domain(cpu, SD_SHARE_CAP_STATES);
+       rcu_assign_pointer(per_cpu(sd_scs, cpu), sd);
  }
  
  /*
@@ -6191,6 +6461,8 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
                  * die on a /0 trap.
                  */
                 sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
+               sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
+               sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
  
                 /*
                  * Make sure the first group of this domain contains the
@@ -6483,11 +6755,19 @@ static int sched_domains_curr_level;
  /*
   * SD_flags allowed in topology descriptions.
   *
- * SD_SHARE_CPUCAPACITY      - describes SMT topologies
- * SD_SHARE_PKG_RESOURCES - describes shared caches
- * SD_NUMA                - describes NUMA topologies
- * SD_SHARE_POWERDOMAIN   - describes shared power domain
- * SD_SHARE_CAP_STATES    - describes shared capacity states
+ * These flags are purely descriptive of the topology and do not prescribe
+ * behaviour. Behaviour is artificial and mapped in the below sd_init()
+ * function:
+ *
+ *   SD_SHARE_CPUCAPACITY   - describes SMT topologies
+ *   SD_SHARE_PKG_RESOURCES - describes shared caches
+ *   SD_NUMA                - describes NUMA topologies
+ *   SD_SHARE_POWERDOMAIN   - describes shared power domain
+ *   SD_ASYM_CPUCAPACITY    - describes mixed capacity topologies
+ *   SD_SHARE_CAP_STATES    - describes shared capacity states
+ *
+ * Odd one out, which beside describing the topology has a quirk also
+ * prescribes the desired behaviour that goes along with it:
   *
   * Odd one out:
   * SD_ASYM_PACKING        - describes SMT quirks
@@ -6497,11 +6777,13 @@ static int sched_domains_curr_level;
          SD_SHARE_PKG_RESOURCES |       \
          SD_NUMA |                      \
          SD_ASYM_PACKING |              \
+        SD_ASYM_CPUCAPACITY |          \
          SD_SHARE_POWERDOMAIN |         \
          SD_SHARE_CAP_STATES)
  
  static struct sched_domain *
-sd_init(struct sched_domain_topology_level *tl, int cpu)
+sd_init(struct sched_domain_topology_level *tl,
+       struct sched_domain *child, int cpu)
  {
         struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);
         int sd_weight, sd_flags = 0;
@@ -6553,6 +6835,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu)
                 .smt_gain               = 0,
                 .max_newidle_lb_cost    = 0,
                 .next_decay_max_lb_cost = jiffies,
+               .child                  = child,
  #ifdef CONFIG_SCHED_DEBUG
                 .name                   = tl->name,
  #endif
@@ -6562,6 +6845,13 @@ sd_init(struct sched_domain_topology_level *tl, int cpu)
          * Convert topological properties into behaviour.
          */
  
+       if (sd->flags & SD_ASYM_CPUCAPACITY) {
+               struct sched_domain *t = sd;
+
+               for_each_lower_domain(t)
+                       t->flags |= SD_BALANCE_WAKE;
+       }
+
         if (sd->flags & SD_SHARE_CPUCAPACITY) {
                 sd->flags |= SD_PREFER_SIBLING;
                 sd->imbalance_pct = 110;
@@ -7008,16 +7298,13 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
                 const struct cpumask *cpu_map, struct sched_domain_attr *attr,
                 struct sched_domain *child, int cpu)
  {
-       struct sched_domain *sd = sd_init(tl, cpu);
-       if (!sd)
-               return child;
+       struct sched_domain *sd = sd_init(tl, child, cpu);
  
         cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
         if (child) {
                 sd->level = child->level + 1;
                 sched_domain_level_max = max(sched_domain_level_max, sd->level);
                 child->parent = sd;
-               sd->child = child;
  
                 if (!cpumask_subset(sched_domain_span(child),
                                     sched_domain_span(sd))) {
@@ -7048,7 +7335,6 @@ static int build_sched_domains(const struct cpumask *cpu_map,
         enum s_alloc alloc_state;
         struct sched_domain *sd;
         struct s_data d;
-       struct rq *rq = NULL;
         int i, ret = -ENOMEM;
  
         alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
@@ -7102,18 +7388,23 @@ static int build_sched_domains(const struct cpumask *cpu_map,
         /* Attach the domains */
         rcu_read_lock();
         for_each_cpu(i, cpu_map) {
-               rq = cpu_rq(i);
+               int max_cpu = READ_ONCE(d.rd->max_cap_orig_cpu);
+               int min_cpu = READ_ONCE(d.rd->min_cap_orig_cpu);
+
+               if ((max_cpu < 0) || (cpu_rq(i)->cpu_capacity_orig >
+                   cpu_rq(max_cpu)->cpu_capacity_orig))
+                       WRITE_ONCE(d.rd->max_cap_orig_cpu, i);
+
+               if ((min_cpu < 0) || (cpu_rq(i)->cpu_capacity_orig <
+                   cpu_rq(min_cpu)->cpu_capacity_orig))
+                       WRITE_ONCE(d.rd->min_cap_orig_cpu, i);
+
                 sd = *per_cpu_ptr(d.sd, i);
-               cpu_attach_domain(sd, d.rd, i);
  
-               if (rq->cpu_capacity_orig > rq->rd->max_cpu_capacity)
-                       rq->rd->max_cpu_capacity = rq->cpu_capacity_orig;
+               cpu_attach_domain(sd, d.rd, i);
         }
         rcu_read_unlock();
  
-       if (rq)
-               pr_info("max cpu_capacity %lu\n", rq->rd->max_cpu_capacity);
-
         ret = 0;
  error:
         __free_domain_allocs(&d, alloc_state, cpu_map);
@@ -7390,6 +7681,7 @@ void __init sched_init_smp(void)
  {
         cpumask_var_t non_isolated_cpus;
  
+       walt_init_cpu_efficiency();
         alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
         alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
  
@@ -7567,6 +7859,11 @@ void __init sched_init(void)
                 rq->idle_stamp = 0;
                 rq->avg_idle = 2*sysctl_sched_migration_cost;
                 rq->max_idle_balance_cost = sysctl_sched_migration_cost;
+#ifdef CONFIG_SCHED_WALT
+               rq->cur_irqload = 0;
+               rq->avg_irqload = 0;
+               rq->irqload_ts = 0;
+#endif
  
                 INIT_LIST_HEAD(&rq->cfs_tasks);
  
@@ -8690,7 +8987,6 @@ struct cgroup_subsys cpu_cgrp_subsys = {
         .fork           = cpu_cgroup_fork,
         .can_attach     = cpu_cgroup_can_attach,
         .attach         = cpu_cgroup_attach,
-       .allow_attach   = subsys_cgroup_allow_attach,
         .legacy_cftypes = cpu_files,
         .early_init     = 1,
  };