Merge branch 'perf/urgent' into perf/core
[firefly-linux-kernel-4.4.55.git] / kernel / softirq.c
1 /*
2  *      linux/kernel/softirq.c
3  *
4  *      Copyright (C) 1992 Linus Torvalds
5  *
6  *      Distribute under GPLv2.
7  *
8  *      Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
9  *
10  *      Remote softirq infrastructure is by Jens Axboe.
11  */
12
13 #include <linux/export.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/interrupt.h>
16 #include <linux/init.h>
17 #include <linux/mm.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/ftrace.h>
25 #include <linux/smp.h>
26 #include <linux/tick.h>
27
28 #define CREATE_TRACE_POINTS
29 #include <trace/events/irq.h>
30
31 #include <asm/irq.h>
32 /*
33    - No shared variables, all the data are CPU local.
34    - If a softirq needs serialization, let it serialize itself
35      by its own spinlocks.
36    - Even if softirq is serialized, only local cpu is marked for
37      execution. Hence, we get something sort of weak cpu binding.
38      Though it is still not clear, will it result in better locality
39      or will not.
40
41    Examples:
42    - NET RX softirq. It is multithreaded and does not require
43      any global serialization.
44    - NET TX softirq. It kicks software netdevice queues, hence
45      it is logically serialized per device, but this serialization
46      is invisible to common code.
47    - Tasklets: serialized wrt itself.
48  */
49
50 #ifndef __ARCH_IRQ_STAT
51 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
52 EXPORT_SYMBOL(irq_stat);
53 #endif
54
55 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
56
57 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
58
59 char *softirq_to_name[NR_SOFTIRQS] = {
60         "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
61         "TASKLET", "SCHED", "HRTIMER", "RCU"
62 };
63
64 /*
65  * we cannot loop indefinitely here to avoid userspace starvation,
66  * but we also don't want to introduce a worst case 1/HZ latency
67  * to the pending events, so lets the scheduler to balance
68  * the softirq load for us.
69  */
70 static void wakeup_softirqd(void)
71 {
72         /* Interrupts are disabled: no need to stop preemption */
73         struct task_struct *tsk = __this_cpu_read(ksoftirqd);
74
75         if (tsk && tsk->state != TASK_RUNNING)
76                 wake_up_process(tsk);
77 }
78
79 /*
80  * preempt_count and SOFTIRQ_OFFSET usage:
81  * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
82  *   softirq processing.
83  * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
84  *   on local_bh_disable or local_bh_enable.
85  * This lets us distinguish between whether we are currently processing
86  * softirq and whether we just have bh disabled.
87  */
88
89 /*
90  * This one is for softirq.c-internal use,
91  * where hardirqs are disabled legitimately:
92  */
93 #ifdef CONFIG_TRACE_IRQFLAGS
94 static void __local_bh_disable(unsigned long ip, unsigned int cnt)
95 {
96         unsigned long flags;
97
98         WARN_ON_ONCE(in_irq());
99
100         raw_local_irq_save(flags);
101         /*
102          * The preempt tracer hooks into add_preempt_count and will break
103          * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
104          * is set and before current->softirq_enabled is cleared.
105          * We must manually increment preempt_count here and manually
106          * call the trace_preempt_off later.
107          */
108         preempt_count() += cnt;
109         /*
110          * Were softirqs turned off above:
111          */
112         if (softirq_count() == cnt)
113                 trace_softirqs_off(ip);
114         raw_local_irq_restore(flags);
115
116         if (preempt_count() == cnt)
117                 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
118 }
119 #else /* !CONFIG_TRACE_IRQFLAGS */
120 static inline void __local_bh_disable(unsigned long ip, unsigned int cnt)
121 {
122         add_preempt_count(cnt);
123         barrier();
124 }
125 #endif /* CONFIG_TRACE_IRQFLAGS */
126
127 void local_bh_disable(void)
128 {
129         __local_bh_disable((unsigned long)__builtin_return_address(0),
130                                 SOFTIRQ_DISABLE_OFFSET);
131 }
132
133 EXPORT_SYMBOL(local_bh_disable);
134
135 static void __local_bh_enable(unsigned int cnt)
136 {
137         WARN_ON_ONCE(in_irq());
138         WARN_ON_ONCE(!irqs_disabled());
139
140         if (softirq_count() == cnt)
141                 trace_softirqs_on((unsigned long)__builtin_return_address(0));
142         sub_preempt_count(cnt);
143 }
144
145 /*
146  * Special-case - softirqs can safely be enabled in
147  * cond_resched_softirq(), or by __do_softirq(),
148  * without processing still-pending softirqs:
149  */
150 void _local_bh_enable(void)
151 {
152         __local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
153 }
154
155 EXPORT_SYMBOL(_local_bh_enable);
156
157 static inline void _local_bh_enable_ip(unsigned long ip)
158 {
159         WARN_ON_ONCE(in_irq() || irqs_disabled());
160 #ifdef CONFIG_TRACE_IRQFLAGS
161         local_irq_disable();
162 #endif
163         /*
164          * Are softirqs going to be turned on now:
165          */
166         if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
167                 trace_softirqs_on(ip);
168         /*
169          * Keep preemption disabled until we are done with
170          * softirq processing:
171          */
172         sub_preempt_count(SOFTIRQ_DISABLE_OFFSET - 1);
173
174         if (unlikely(!in_interrupt() && local_softirq_pending()))
175                 do_softirq();
176
177         dec_preempt_count();
178 #ifdef CONFIG_TRACE_IRQFLAGS
179         local_irq_enable();
180 #endif
181         preempt_check_resched();
182 }
183
184 void local_bh_enable(void)
185 {
186         _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
187 }
188 EXPORT_SYMBOL(local_bh_enable);
189
190 void local_bh_enable_ip(unsigned long ip)
191 {
192         _local_bh_enable_ip(ip);
193 }
194 EXPORT_SYMBOL(local_bh_enable_ip);
195
196 /*
197  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
198  * and we fall back to softirqd after that.
199  *
200  * This number has been established via experimentation.
201  * The two things to balance is latency against fairness -
202  * we want to handle softirqs as soon as possible, but they
203  * should not be able to lock up the box.
204  */
205 #define MAX_SOFTIRQ_RESTART 10
206
207 asmlinkage void __do_softirq(void)
208 {
209         struct softirq_action *h;
210         __u32 pending;
211         int max_restart = MAX_SOFTIRQ_RESTART;
212         int cpu;
213
214         pending = local_softirq_pending();
215         account_system_vtime(current);
216
217         __local_bh_disable((unsigned long)__builtin_return_address(0),
218                                 SOFTIRQ_OFFSET);
219         lockdep_softirq_enter();
220
221         cpu = smp_processor_id();
222 restart:
223         /* Reset the pending bitmask before enabling irqs */
224         set_softirq_pending(0);
225
226         local_irq_enable();
227
228         h = softirq_vec;
229
230         do {
231                 if (pending & 1) {
232                         unsigned int vec_nr = h - softirq_vec;
233                         int prev_count = preempt_count();
234
235                         kstat_incr_softirqs_this_cpu(vec_nr);
236
237                         trace_softirq_entry(vec_nr);
238                         h->action(h);
239                         trace_softirq_exit(vec_nr);
240                         if (unlikely(prev_count != preempt_count())) {
241                                 printk(KERN_ERR "huh, entered softirq %u %s %p"
242                                        "with preempt_count %08x,"
243                                        " exited with %08x?\n", vec_nr,
244                                        softirq_to_name[vec_nr], h->action,
245                                        prev_count, preempt_count());
246                                 preempt_count() = prev_count;
247                         }
248
249                         rcu_bh_qs(cpu);
250                 }
251                 h++;
252                 pending >>= 1;
253         } while (pending);
254
255         local_irq_disable();
256
257         pending = local_softirq_pending();
258         if (pending && --max_restart)
259                 goto restart;
260
261         if (pending)
262                 wakeup_softirqd();
263
264         lockdep_softirq_exit();
265
266         account_system_vtime(current);
267         __local_bh_enable(SOFTIRQ_OFFSET);
268 }
269
270 #ifndef __ARCH_HAS_DO_SOFTIRQ
271
272 asmlinkage void do_softirq(void)
273 {
274         __u32 pending;
275         unsigned long flags;
276
277         if (in_interrupt())
278                 return;
279
280         local_irq_save(flags);
281
282         pending = local_softirq_pending();
283
284         if (pending)
285                 __do_softirq();
286
287         local_irq_restore(flags);
288 }
289
290 #endif
291
292 /*
293  * Enter an interrupt context.
294  */
295 void irq_enter(void)
296 {
297         int cpu = smp_processor_id();
298
299         rcu_irq_enter();
300         if (idle_cpu(cpu) && !in_interrupt()) {
301                 /*
302                  * Prevent raise_softirq from needlessly waking up ksoftirqd
303                  * here, as softirq will be serviced on return from interrupt.
304                  */
305                 local_bh_disable();
306                 tick_check_idle(cpu);
307                 _local_bh_enable();
308         }
309
310         __irq_enter();
311 }
312
313 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
314 static inline void invoke_softirq(void)
315 {
316         if (!force_irqthreads)
317                 __do_softirq();
318         else {
319                 __local_bh_disable((unsigned long)__builtin_return_address(0),
320                                 SOFTIRQ_OFFSET);
321                 wakeup_softirqd();
322                 __local_bh_enable(SOFTIRQ_OFFSET);
323         }
324 }
325 #else
326 static inline void invoke_softirq(void)
327 {
328         if (!force_irqthreads)
329                 do_softirq();
330         else {
331                 __local_bh_disable((unsigned long)__builtin_return_address(0),
332                                 SOFTIRQ_OFFSET);
333                 wakeup_softirqd();
334                 __local_bh_enable(SOFTIRQ_OFFSET);
335         }
336 }
337 #endif
338
339 /*
340  * Exit an interrupt context. Process softirqs if needed and possible:
341  */
342 void irq_exit(void)
343 {
344         account_system_vtime(current);
345         trace_hardirq_exit();
346         sub_preempt_count(IRQ_EXIT_OFFSET);
347         if (!in_interrupt() && local_softirq_pending())
348                 invoke_softirq();
349
350 #ifdef CONFIG_NO_HZ
351         /* Make sure that timer wheel updates are propagated */
352         if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
353                 tick_nohz_irq_exit();
354 #endif
355         rcu_irq_exit();
356         preempt_enable_no_resched();
357 }
358
359 /*
360  * This function must run with irqs disabled!
361  */
362 inline void raise_softirq_irqoff(unsigned int nr)
363 {
364         __raise_softirq_irqoff(nr);
365
366         /*
367          * If we're in an interrupt or softirq, we're done
368          * (this also catches softirq-disabled code). We will
369          * actually run the softirq once we return from
370          * the irq or softirq.
371          *
372          * Otherwise we wake up ksoftirqd to make sure we
373          * schedule the softirq soon.
374          */
375         if (!in_interrupt())
376                 wakeup_softirqd();
377 }
378
379 void raise_softirq(unsigned int nr)
380 {
381         unsigned long flags;
382
383         local_irq_save(flags);
384         raise_softirq_irqoff(nr);
385         local_irq_restore(flags);
386 }
387
388 void __raise_softirq_irqoff(unsigned int nr)
389 {
390         trace_softirq_raise(nr);
391         or_softirq_pending(1UL << nr);
392 }
393
394 void open_softirq(int nr, void (*action)(struct softirq_action *))
395 {
396         softirq_vec[nr].action = action;
397 }
398
399 /*
400  * Tasklets
401  */
402 struct tasklet_head
403 {
404         struct tasklet_struct *head;
405         struct tasklet_struct **tail;
406 };
407
408 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
409 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
410
411 void __tasklet_schedule(struct tasklet_struct *t)
412 {
413         unsigned long flags;
414
415         local_irq_save(flags);
416         t->next = NULL;
417         *__this_cpu_read(tasklet_vec.tail) = t;
418         __this_cpu_write(tasklet_vec.tail, &(t->next));
419         raise_softirq_irqoff(TASKLET_SOFTIRQ);
420         local_irq_restore(flags);
421 }
422
423 EXPORT_SYMBOL(__tasklet_schedule);
424
425 void __tasklet_hi_schedule(struct tasklet_struct *t)
426 {
427         unsigned long flags;
428
429         local_irq_save(flags);
430         t->next = NULL;
431         *__this_cpu_read(tasklet_hi_vec.tail) = t;
432         __this_cpu_write(tasklet_hi_vec.tail,  &(t->next));
433         raise_softirq_irqoff(HI_SOFTIRQ);
434         local_irq_restore(flags);
435 }
436
437 EXPORT_SYMBOL(__tasklet_hi_schedule);
438
439 void __tasklet_hi_schedule_first(struct tasklet_struct *t)
440 {
441         BUG_ON(!irqs_disabled());
442
443         t->next = __this_cpu_read(tasklet_hi_vec.head);
444         __this_cpu_write(tasklet_hi_vec.head, t);
445         __raise_softirq_irqoff(HI_SOFTIRQ);
446 }
447
448 EXPORT_SYMBOL(__tasklet_hi_schedule_first);
449
450 static void tasklet_action(struct softirq_action *a)
451 {
452         struct tasklet_struct *list;
453
454         local_irq_disable();
455         list = __this_cpu_read(tasklet_vec.head);
456         __this_cpu_write(tasklet_vec.head, NULL);
457         __this_cpu_write(tasklet_vec.tail, &__get_cpu_var(tasklet_vec).head);
458         local_irq_enable();
459
460         while (list) {
461                 struct tasklet_struct *t = list;
462
463                 list = list->next;
464
465                 if (tasklet_trylock(t)) {
466                         if (!atomic_read(&t->count)) {
467                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
468                                         BUG();
469                                 t->func(t->data);
470                                 tasklet_unlock(t);
471                                 continue;
472                         }
473                         tasklet_unlock(t);
474                 }
475
476                 local_irq_disable();
477                 t->next = NULL;
478                 *__this_cpu_read(tasklet_vec.tail) = t;
479                 __this_cpu_write(tasklet_vec.tail, &(t->next));
480                 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
481                 local_irq_enable();
482         }
483 }
484
485 static void tasklet_hi_action(struct softirq_action *a)
486 {
487         struct tasklet_struct *list;
488
489         local_irq_disable();
490         list = __this_cpu_read(tasklet_hi_vec.head);
491         __this_cpu_write(tasklet_hi_vec.head, NULL);
492         __this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head);
493         local_irq_enable();
494
495         while (list) {
496                 struct tasklet_struct *t = list;
497
498                 list = list->next;
499
500                 if (tasklet_trylock(t)) {
501                         if (!atomic_read(&t->count)) {
502                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
503                                         BUG();
504                                 t->func(t->data);
505                                 tasklet_unlock(t);
506                                 continue;
507                         }
508                         tasklet_unlock(t);
509                 }
510
511                 local_irq_disable();
512                 t->next = NULL;
513                 *__this_cpu_read(tasklet_hi_vec.tail) = t;
514                 __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
515                 __raise_softirq_irqoff(HI_SOFTIRQ);
516                 local_irq_enable();
517         }
518 }
519
520
521 void tasklet_init(struct tasklet_struct *t,
522                   void (*func)(unsigned long), unsigned long data)
523 {
524         t->next = NULL;
525         t->state = 0;
526         atomic_set(&t->count, 0);
527         t->func = func;
528         t->data = data;
529 }
530
531 EXPORT_SYMBOL(tasklet_init);
532
533 void tasklet_kill(struct tasklet_struct *t)
534 {
535         if (in_interrupt())
536                 printk("Attempt to kill tasklet from interrupt\n");
537
538         while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
539                 do {
540                         yield();
541                 } while (test_bit(TASKLET_STATE_SCHED, &t->state));
542         }
543         tasklet_unlock_wait(t);
544         clear_bit(TASKLET_STATE_SCHED, &t->state);
545 }
546
547 EXPORT_SYMBOL(tasklet_kill);
548
549 /*
550  * tasklet_hrtimer
551  */
552
553 /*
554  * The trampoline is called when the hrtimer expires. It schedules a tasklet
555  * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
556  * hrtimer callback, but from softirq context.
557  */
558 static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
559 {
560         struct tasklet_hrtimer *ttimer =
561                 container_of(timer, struct tasklet_hrtimer, timer);
562
563         tasklet_hi_schedule(&ttimer->tasklet);
564         return HRTIMER_NORESTART;
565 }
566
567 /*
568  * Helper function which calls the hrtimer callback from
569  * tasklet/softirq context
570  */
571 static void __tasklet_hrtimer_trampoline(unsigned long data)
572 {
573         struct tasklet_hrtimer *ttimer = (void *)data;
574         enum hrtimer_restart restart;
575
576         restart = ttimer->function(&ttimer->timer);
577         if (restart != HRTIMER_NORESTART)
578                 hrtimer_restart(&ttimer->timer);
579 }
580
581 /**
582  * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
583  * @ttimer:      tasklet_hrtimer which is initialized
584  * @function:    hrtimer callback function which gets called from softirq context
585  * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
586  * @mode:        hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
587  */
588 void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
589                           enum hrtimer_restart (*function)(struct hrtimer *),
590                           clockid_t which_clock, enum hrtimer_mode mode)
591 {
592         hrtimer_init(&ttimer->timer, which_clock, mode);
593         ttimer->timer.function = __hrtimer_tasklet_trampoline;
594         tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
595                      (unsigned long)ttimer);
596         ttimer->function = function;
597 }
598 EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
599
600 /*
601  * Remote softirq bits
602  */
603
604 DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
605 EXPORT_PER_CPU_SYMBOL(softirq_work_list);
606
607 static void __local_trigger(struct call_single_data *cp, int softirq)
608 {
609         struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
610
611         list_add_tail(&cp->list, head);
612
613         /* Trigger the softirq only if the list was previously empty.  */
614         if (head->next == &cp->list)
615                 raise_softirq_irqoff(softirq);
616 }
617
618 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
619 static void remote_softirq_receive(void *data)
620 {
621         struct call_single_data *cp = data;
622         unsigned long flags;
623         int softirq;
624
625         softirq = cp->priv;
626
627         local_irq_save(flags);
628         __local_trigger(cp, softirq);
629         local_irq_restore(flags);
630 }
631
632 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
633 {
634         if (cpu_online(cpu)) {
635                 cp->func = remote_softirq_receive;
636                 cp->info = cp;
637                 cp->flags = 0;
638                 cp->priv = softirq;
639
640                 __smp_call_function_single(cpu, cp, 0);
641                 return 0;
642         }
643         return 1;
644 }
645 #else /* CONFIG_USE_GENERIC_SMP_HELPERS */
646 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
647 {
648         return 1;
649 }
650 #endif
651
652 /**
653  * __send_remote_softirq - try to schedule softirq work on a remote cpu
654  * @cp: private SMP call function data area
655  * @cpu: the remote cpu
656  * @this_cpu: the currently executing cpu
657  * @softirq: the softirq for the work
658  *
659  * Attempt to schedule softirq work on a remote cpu.  If this cannot be
660  * done, the work is instead queued up on the local cpu.
661  *
662  * Interrupts must be disabled.
663  */
664 void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
665 {
666         if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
667                 __local_trigger(cp, softirq);
668 }
669 EXPORT_SYMBOL(__send_remote_softirq);
670
671 /**
672  * send_remote_softirq - try to schedule softirq work on a remote cpu
673  * @cp: private SMP call function data area
674  * @cpu: the remote cpu
675  * @softirq: the softirq for the work
676  *
677  * Like __send_remote_softirq except that disabling interrupts and
678  * computing the current cpu is done for the caller.
679  */
680 void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
681 {
682         unsigned long flags;
683         int this_cpu;
684
685         local_irq_save(flags);
686         this_cpu = smp_processor_id();
687         __send_remote_softirq(cp, cpu, this_cpu, softirq);
688         local_irq_restore(flags);
689 }
690 EXPORT_SYMBOL(send_remote_softirq);
691
692 static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
693                                                unsigned long action, void *hcpu)
694 {
695         /*
696          * If a CPU goes away, splice its entries to the current CPU
697          * and trigger a run of the softirq
698          */
699         if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
700                 int cpu = (unsigned long) hcpu;
701                 int i;
702
703                 local_irq_disable();
704                 for (i = 0; i < NR_SOFTIRQS; i++) {
705                         struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
706                         struct list_head *local_head;
707
708                         if (list_empty(head))
709                                 continue;
710
711                         local_head = &__get_cpu_var(softirq_work_list[i]);
712                         list_splice_init(head, local_head);
713                         raise_softirq_irqoff(i);
714                 }
715                 local_irq_enable();
716         }
717
718         return NOTIFY_OK;
719 }
720
721 static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
722         .notifier_call  = remote_softirq_cpu_notify,
723 };
724
725 void __init softirq_init(void)
726 {
727         int cpu;
728
729         for_each_possible_cpu(cpu) {
730                 int i;
731
732                 per_cpu(tasklet_vec, cpu).tail =
733                         &per_cpu(tasklet_vec, cpu).head;
734                 per_cpu(tasklet_hi_vec, cpu).tail =
735                         &per_cpu(tasklet_hi_vec, cpu).head;
736                 for (i = 0; i < NR_SOFTIRQS; i++)
737                         INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
738         }
739
740         register_hotcpu_notifier(&remote_softirq_cpu_notifier);
741
742         open_softirq(TASKLET_SOFTIRQ, tasklet_action);
743         open_softirq(HI_SOFTIRQ, tasklet_hi_action);
744 }
745
746 static int run_ksoftirqd(void * __bind_cpu)
747 {
748         set_current_state(TASK_INTERRUPTIBLE);
749
750         while (!kthread_should_stop()) {
751                 preempt_disable();
752                 if (!local_softirq_pending()) {
753                         preempt_enable_no_resched();
754                         schedule();
755                         preempt_disable();
756                 }
757
758                 __set_current_state(TASK_RUNNING);
759
760                 while (local_softirq_pending()) {
761                         /* Preempt disable stops cpu going offline.
762                            If already offline, we'll be on wrong CPU:
763                            don't process */
764                         if (cpu_is_offline((long)__bind_cpu))
765                                 goto wait_to_die;
766                         local_irq_disable();
767                         if (local_softirq_pending())
768                                 __do_softirq();
769                         local_irq_enable();
770                         preempt_enable_no_resched();
771                         cond_resched();
772                         preempt_disable();
773                         rcu_note_context_switch((long)__bind_cpu);
774                 }
775                 preempt_enable();
776                 set_current_state(TASK_INTERRUPTIBLE);
777         }
778         __set_current_state(TASK_RUNNING);
779         return 0;
780
781 wait_to_die:
782         preempt_enable();
783         /* Wait for kthread_stop */
784         set_current_state(TASK_INTERRUPTIBLE);
785         while (!kthread_should_stop()) {
786                 schedule();
787                 set_current_state(TASK_INTERRUPTIBLE);
788         }
789         __set_current_state(TASK_RUNNING);
790         return 0;
791 }
792
793 #ifdef CONFIG_HOTPLUG_CPU
794 /*
795  * tasklet_kill_immediate is called to remove a tasklet which can already be
796  * scheduled for execution on @cpu.
797  *
798  * Unlike tasklet_kill, this function removes the tasklet
799  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
800  *
801  * When this function is called, @cpu must be in the CPU_DEAD state.
802  */
803 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
804 {
805         struct tasklet_struct **i;
806
807         BUG_ON(cpu_online(cpu));
808         BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
809
810         if (!test_bit(TASKLET_STATE_SCHED, &t->state))
811                 return;
812
813         /* CPU is dead, so no lock needed. */
814         for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
815                 if (*i == t) {
816                         *i = t->next;
817                         /* If this was the tail element, move the tail ptr */
818                         if (*i == NULL)
819                                 per_cpu(tasklet_vec, cpu).tail = i;
820                         return;
821                 }
822         }
823         BUG();
824 }
825
826 static void takeover_tasklets(unsigned int cpu)
827 {
828         /* CPU is dead, so no lock needed. */
829         local_irq_disable();
830
831         /* Find end, append list for that CPU. */
832         if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
833                 *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
834                 this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
835                 per_cpu(tasklet_vec, cpu).head = NULL;
836                 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
837         }
838         raise_softirq_irqoff(TASKLET_SOFTIRQ);
839
840         if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
841                 *__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
842                 __this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
843                 per_cpu(tasklet_hi_vec, cpu).head = NULL;
844                 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
845         }
846         raise_softirq_irqoff(HI_SOFTIRQ);
847
848         local_irq_enable();
849 }
850 #endif /* CONFIG_HOTPLUG_CPU */
851
852 static int __cpuinit cpu_callback(struct notifier_block *nfb,
853                                   unsigned long action,
854                                   void *hcpu)
855 {
856         int hotcpu = (unsigned long)hcpu;
857         struct task_struct *p;
858
859         switch (action) {
860         case CPU_UP_PREPARE:
861         case CPU_UP_PREPARE_FROZEN:
862                 p = kthread_create_on_node(run_ksoftirqd,
863                                            hcpu,
864                                            cpu_to_node(hotcpu),
865                                            "ksoftirqd/%d", hotcpu);
866                 if (IS_ERR(p)) {
867                         printk("ksoftirqd for %i failed\n", hotcpu);
868                         return notifier_from_errno(PTR_ERR(p));
869                 }
870                 kthread_bind(p, hotcpu);
871                 per_cpu(ksoftirqd, hotcpu) = p;
872                 break;
873         case CPU_ONLINE:
874         case CPU_ONLINE_FROZEN:
875                 wake_up_process(per_cpu(ksoftirqd, hotcpu));
876                 break;
877 #ifdef CONFIG_HOTPLUG_CPU
878         case CPU_UP_CANCELED:
879         case CPU_UP_CANCELED_FROZEN:
880                 if (!per_cpu(ksoftirqd, hotcpu))
881                         break;
882                 /* Unbind so it can run.  Fall thru. */
883                 kthread_bind(per_cpu(ksoftirqd, hotcpu),
884                              cpumask_any(cpu_online_mask));
885         case CPU_DEAD:
886         case CPU_DEAD_FROZEN: {
887                 static const struct sched_param param = {
888                         .sched_priority = MAX_RT_PRIO-1
889                 };
890
891                 p = per_cpu(ksoftirqd, hotcpu);
892                 per_cpu(ksoftirqd, hotcpu) = NULL;
893                 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
894                 kthread_stop(p);
895                 takeover_tasklets(hotcpu);
896                 break;
897         }
898 #endif /* CONFIG_HOTPLUG_CPU */
899         }
900         return NOTIFY_OK;
901 }
902
903 static struct notifier_block __cpuinitdata cpu_nfb = {
904         .notifier_call = cpu_callback
905 };
906
907 static __init int spawn_ksoftirqd(void)
908 {
909         void *cpu = (void *)(long)smp_processor_id();
910         int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
911
912         BUG_ON(err != NOTIFY_OK);
913         cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
914         register_cpu_notifier(&cpu_nfb);
915         return 0;
916 }
917 early_initcall(spawn_ksoftirqd);
918
919 /*
920  * [ These __weak aliases are kept in a separate compilation unit, so that
921  *   GCC does not inline them incorrectly. ]
922  */
923
924 int __init __weak early_irq_init(void)
925 {
926         return 0;
927 }
928
929 #ifdef CONFIG_GENERIC_HARDIRQS
930 int __init __weak arch_probe_nr_irqs(void)
931 {
932         return NR_IRQS_LEGACY;
933 }
934
935 int __init __weak arch_early_irq_init(void)
936 {
937         return 0;
938 }
939 #endif