2 * arch/ia64/kernel/domain.c
3 * Architecture specific sched-domains builder.
5 * Copyright (C) 2004 Jesse Barnes
6 * Copyright (C) 2004 Silicon Graphics, Inc.
9 #include <linux/sched.h>
10 #include <linux/percpu.h>
11 #include <linux/slab.h>
12 #include <linux/cpumask.h>
13 #include <linux/init.h>
14 #include <linux/topology.h>
15 #include <linux/nodemask.h>
17 #define SD_NODES_PER_DOMAIN 16
21 * find_next_best_node - find the next node to include in a sched_domain
22 * @node: node whose sched_domain we're building
23 * @used_nodes: nodes already in the sched_domain
25 * Find the next node to include in a given scheduling domain. Simply
26 * finds the closest node not already in the @used_nodes map.
28 * Should use nodemask_t.
30 static int find_next_best_node(int node, unsigned long *used_nodes)
32 int i, n, val, min_val, best_node = 0;
36 for (i = 0; i < MAX_NUMNODES; i++) {
38 n = (node + i) % MAX_NUMNODES;
43 /* Skip already used nodes */
44 if (test_bit(n, used_nodes))
47 /* Simple min distance search */
48 val = node_distance(node, n);
56 set_bit(best_node, used_nodes);
61 * sched_domain_node_span - get a cpumask for a node's sched_domain
62 * @node: node whose cpumask we're constructing
63 * @size: number of nodes to include in this span
65 * Given a node, construct a good cpumask for its sched_domain to span. It
66 * should be one that prevents unnecessary balancing, but also spreads tasks
69 static cpumask_t sched_domain_node_span(int node)
72 cpumask_t span, nodemask;
73 DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
76 bitmap_zero(used_nodes, MAX_NUMNODES);
78 nodemask = node_to_cpumask(node);
79 cpus_or(span, span, nodemask);
80 set_bit(node, used_nodes);
82 for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
83 int next_node = find_next_best_node(node, used_nodes);
84 nodemask = node_to_cpumask(next_node);
85 cpus_or(span, span, nodemask);
93 * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
94 * can switch it on easily if needed.
96 #ifdef CONFIG_SCHED_SMT
97 static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
98 static struct sched_group sched_group_cpus[NR_CPUS];
99 static int cpu_to_cpu_group(int cpu)
105 static DEFINE_PER_CPU(struct sched_domain, phys_domains);
106 static struct sched_group sched_group_phys[NR_CPUS];
107 static int cpu_to_phys_group(int cpu)
109 #ifdef CONFIG_SCHED_SMT
110 return first_cpu(cpu_sibling_map[cpu]);
118 * The init_sched_build_groups can't handle what we want to do with node
119 * groups, so roll our own. Now each node has its own list of groups which
120 * gets dynamically allocated.
122 static DEFINE_PER_CPU(struct sched_domain, node_domains);
123 static struct sched_group **sched_group_nodes_bycpu[NR_CPUS];
125 static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
126 static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS];
128 static int cpu_to_allnodes_group(int cpu)
130 return cpu_to_node(cpu);
135 * Build sched domains for a given set of cpus and attach the sched domains
136 * to the individual cpus
138 void build_sched_domains(const cpumask_t *cpu_map)
142 struct sched_group **sched_group_nodes = NULL;
143 struct sched_group *sched_group_allnodes = NULL;
146 * Allocate the per-node list of sched groups
148 sched_group_nodes = kmalloc(sizeof(struct sched_group*)*MAX_NUMNODES,
150 if (!sched_group_nodes) {
151 printk(KERN_WARNING "Can not alloc sched group node list\n");
154 sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
158 * Set up domains for cpus specified by the cpu_map.
160 for_each_cpu_mask(i, *cpu_map) {
162 struct sched_domain *sd = NULL, *p;
163 cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
165 cpus_and(nodemask, nodemask, *cpu_map);
168 if (cpus_weight(*cpu_map)
169 > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
170 if (!sched_group_allnodes) {
172 = kmalloc(sizeof(struct sched_group)
175 if (!sched_group_allnodes) {
177 "Can not alloc allnodes sched group\n");
180 sched_group_allnodes_bycpu[i]
181 = sched_group_allnodes;
183 sd = &per_cpu(allnodes_domains, i);
184 *sd = SD_ALLNODES_INIT;
186 group = cpu_to_allnodes_group(i);
187 sd->groups = &sched_group_allnodes[group];
192 sd = &per_cpu(node_domains, i);
194 sd->span = sched_domain_node_span(cpu_to_node(i));
196 cpus_and(sd->span, sd->span, *cpu_map);
200 sd = &per_cpu(phys_domains, i);
201 group = cpu_to_phys_group(i);
205 sd->groups = &sched_group_phys[group];
207 #ifdef CONFIG_SCHED_SMT
209 sd = &per_cpu(cpu_domains, i);
210 group = cpu_to_cpu_group(i);
211 *sd = SD_SIBLING_INIT;
212 sd->span = cpu_sibling_map[i];
213 cpus_and(sd->span, sd->span, *cpu_map);
215 sd->groups = &sched_group_cpus[group];
219 #ifdef CONFIG_SCHED_SMT
220 /* Set up CPU (sibling) groups */
221 for_each_cpu_mask(i, *cpu_map) {
222 cpumask_t this_sibling_map = cpu_sibling_map[i];
223 cpus_and(this_sibling_map, this_sibling_map, *cpu_map);
224 if (i != first_cpu(this_sibling_map))
227 init_sched_build_groups(sched_group_cpus, this_sibling_map,
232 /* Set up physical groups */
233 for (i = 0; i < MAX_NUMNODES; i++) {
234 cpumask_t nodemask = node_to_cpumask(i);
236 cpus_and(nodemask, nodemask, *cpu_map);
237 if (cpus_empty(nodemask))
240 init_sched_build_groups(sched_group_phys, nodemask,
245 if (sched_group_allnodes)
246 init_sched_build_groups(sched_group_allnodes, *cpu_map,
247 &cpu_to_allnodes_group);
249 for (i = 0; i < MAX_NUMNODES; i++) {
250 /* Set up node groups */
251 struct sched_group *sg, *prev;
252 cpumask_t nodemask = node_to_cpumask(i);
253 cpumask_t domainspan;
254 cpumask_t covered = CPU_MASK_NONE;
257 cpus_and(nodemask, nodemask, *cpu_map);
258 if (cpus_empty(nodemask)) {
259 sched_group_nodes[i] = NULL;
263 domainspan = sched_domain_node_span(i);
264 cpus_and(domainspan, domainspan, *cpu_map);
266 sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
267 sched_group_nodes[i] = sg;
268 for_each_cpu_mask(j, nodemask) {
269 struct sched_domain *sd;
270 sd = &per_cpu(node_domains, j);
272 if (sd->groups == NULL) {
273 /* Turn off balancing if we have no groups */
279 "Can not alloc domain group for node %d\n", i);
283 sg->cpumask = nodemask;
284 cpus_or(covered, covered, nodemask);
287 for (j = 0; j < MAX_NUMNODES; j++) {
288 cpumask_t tmp, notcovered;
289 int n = (i + j) % MAX_NUMNODES;
291 cpus_complement(notcovered, covered);
292 cpus_and(tmp, notcovered, *cpu_map);
293 cpus_and(tmp, tmp, domainspan);
297 nodemask = node_to_cpumask(n);
298 cpus_and(tmp, tmp, nodemask);
302 sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
305 "Can not alloc domain group for node %d\n", j);
310 cpus_or(covered, covered, tmp);
314 prev->next = sched_group_nodes[i];
318 /* Calculate CPU power for physical packages and nodes */
319 for_each_cpu_mask(i, *cpu_map) {
321 struct sched_domain *sd;
322 #ifdef CONFIG_SCHED_SMT
323 sd = &per_cpu(cpu_domains, i);
324 power = SCHED_LOAD_SCALE;
325 sd->groups->cpu_power = power;
328 sd = &per_cpu(phys_domains, i);
329 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
330 (cpus_weight(sd->groups->cpumask)-1) / 10;
331 sd->groups->cpu_power = power;
334 sd = &per_cpu(allnodes_domains, i);
336 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
337 (cpus_weight(sd->groups->cpumask)-1) / 10;
338 sd->groups->cpu_power = power;
344 for (i = 0; i < MAX_NUMNODES; i++) {
345 struct sched_group *sg = sched_group_nodes[i];
351 for_each_cpu_mask(j, sg->cpumask) {
352 struct sched_domain *sd;
355 sd = &per_cpu(phys_domains, j);
356 if (j != first_cpu(sd->groups->cpumask)) {
358 * Only add "power" once for each
363 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
364 (cpus_weight(sd->groups->cpumask)-1) / 10;
366 sg->cpu_power += power;
369 if (sg != sched_group_nodes[i])
374 /* Attach the domains */
375 for_each_cpu_mask(i, *cpu_map) {
376 struct sched_domain *sd;
377 #ifdef CONFIG_SCHED_SMT
378 sd = &per_cpu(cpu_domains, i);
380 sd = &per_cpu(phys_domains, i);
382 cpu_attach_domain(sd, i);
386 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
388 void arch_init_sched_domains(const cpumask_t *cpu_map)
390 cpumask_t cpu_default_map;
393 * Setup mask for cpus without special case scheduling requirements.
394 * For now this just excludes isolated cpus, but could be used to
395 * exclude other special cases in the future.
397 cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map);
399 build_sched_domains(&cpu_default_map);
402 void arch_destroy_sched_domains(const cpumask_t *cpu_map)
408 for_each_cpu_mask(cpu, *cpu_map) {
409 struct sched_group *sched_group_allnodes
410 = sched_group_allnodes_bycpu[cpu];
411 struct sched_group **sched_group_nodes
412 = sched_group_nodes_bycpu[cpu];
414 if (sched_group_allnodes) {
415 kfree(sched_group_allnodes);
416 sched_group_allnodes_bycpu[cpu] = NULL;
419 if (!sched_group_nodes)
422 for (i = 0; i < MAX_NUMNODES; i++) {
423 cpumask_t nodemask = node_to_cpumask(i);
424 struct sched_group *oldsg, *sg = sched_group_nodes[i];
426 cpus_and(nodemask, nodemask, *cpu_map);
427 if (cpus_empty(nodemask))
437 if (oldsg != sched_group_nodes[i])
440 kfree(sched_group_nodes);
441 sched_group_nodes_bycpu[cpu] = NULL;