softirq: Reduce invoke_softirq() code duplication
[firefly-linux-kernel-4.4.55.git] / kernel / softirq.c
1 /*
2  *      linux/kernel/softirq.c
3  *
4  *      Copyright (C) 1992 Linus Torvalds
5  *
6  *      Distribute under GPLv2.
7  *
8  *      Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
9  *
10  *      Remote softirq infrastructure is by Jens Axboe.
11  */
12
13 #include <linux/export.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/interrupt.h>
16 #include <linux/init.h>
17 #include <linux/mm.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/ftrace.h>
25 #include <linux/smp.h>
26 #include <linux/tick.h>
27
28 #define CREATE_TRACE_POINTS
29 #include <trace/events/irq.h>
30
31 #include <asm/irq.h>
32 /*
33    - No shared variables, all the data are CPU local.
34    - If a softirq needs serialization, let it serialize itself
35      by its own spinlocks.
36    - Even if softirq is serialized, only local cpu is marked for
37      execution. Hence, we get something sort of weak cpu binding.
38      Though it is still not clear, will it result in better locality
39      or will not.
40
41    Examples:
42    - NET RX softirq. It is multithreaded and does not require
43      any global serialization.
44    - NET TX softirq. It kicks software netdevice queues, hence
45      it is logically serialized per device, but this serialization
46      is invisible to common code.
47    - Tasklets: serialized wrt itself.
48  */
49
50 #ifndef __ARCH_IRQ_STAT
51 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
52 EXPORT_SYMBOL(irq_stat);
53 #endif
54
55 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
56
57 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
58
59 char *softirq_to_name[NR_SOFTIRQS] = {
60         "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
61         "TASKLET", "SCHED", "HRTIMER", "RCU"
62 };
63
64 /*
65  * we cannot loop indefinitely here to avoid userspace starvation,
66  * but we also don't want to introduce a worst case 1/HZ latency
67  * to the pending events, so lets the scheduler to balance
68  * the softirq load for us.
69  */
70 static void wakeup_softirqd(void)
71 {
72         /* Interrupts are disabled: no need to stop preemption */
73         struct task_struct *tsk = __this_cpu_read(ksoftirqd);
74
75         if (tsk && tsk->state != TASK_RUNNING)
76                 wake_up_process(tsk);
77 }
78
79 /*
80  * preempt_count and SOFTIRQ_OFFSET usage:
81  * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
82  *   softirq processing.
83  * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
84  *   on local_bh_disable or local_bh_enable.
85  * This lets us distinguish between whether we are currently processing
86  * softirq and whether we just have bh disabled.
87  */
88
89 /*
90  * This one is for softirq.c-internal use,
91  * where hardirqs are disabled legitimately:
92  */
93 #ifdef CONFIG_TRACE_IRQFLAGS
94 static void __local_bh_disable(unsigned long ip, unsigned int cnt)
95 {
96         unsigned long flags;
97
98         WARN_ON_ONCE(in_irq());
99
100         raw_local_irq_save(flags);
101         /*
102          * The preempt tracer hooks into add_preempt_count and will break
103          * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
104          * is set and before current->softirq_enabled is cleared.
105          * We must manually increment preempt_count here and manually
106          * call the trace_preempt_off later.
107          */
108         preempt_count() += cnt;
109         /*
110          * Were softirqs turned off above:
111          */
112         if (softirq_count() == cnt)
113                 trace_softirqs_off(ip);
114         raw_local_irq_restore(flags);
115
116         if (preempt_count() == cnt)
117                 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
118 }
119 #else /* !CONFIG_TRACE_IRQFLAGS */
120 static inline void __local_bh_disable(unsigned long ip, unsigned int cnt)
121 {
122         add_preempt_count(cnt);
123         barrier();
124 }
125 #endif /* CONFIG_TRACE_IRQFLAGS */
126
127 void local_bh_disable(void)
128 {
129         __local_bh_disable((unsigned long)__builtin_return_address(0),
130                                 SOFTIRQ_DISABLE_OFFSET);
131 }
132
133 EXPORT_SYMBOL(local_bh_disable);
134
135 static void __local_bh_enable(unsigned int cnt)
136 {
137         WARN_ON_ONCE(in_irq());
138         WARN_ON_ONCE(!irqs_disabled());
139
140         if (softirq_count() == cnt)
141                 trace_softirqs_on((unsigned long)__builtin_return_address(0));
142         sub_preempt_count(cnt);
143 }
144
145 /*
146  * Special-case - softirqs can safely be enabled in
147  * cond_resched_softirq(), or by __do_softirq(),
148  * without processing still-pending softirqs:
149  */
150 void _local_bh_enable(void)
151 {
152         __local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
153 }
154
155 EXPORT_SYMBOL(_local_bh_enable);
156
157 static inline void _local_bh_enable_ip(unsigned long ip)
158 {
159         WARN_ON_ONCE(in_irq() || irqs_disabled());
160 #ifdef CONFIG_TRACE_IRQFLAGS
161         local_irq_disable();
162 #endif
163         /*
164          * Are softirqs going to be turned on now:
165          */
166         if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
167                 trace_softirqs_on(ip);
168         /*
169          * Keep preemption disabled until we are done with
170          * softirq processing:
171          */
172         sub_preempt_count(SOFTIRQ_DISABLE_OFFSET - 1);
173
174         if (unlikely(!in_interrupt() && local_softirq_pending()))
175                 do_softirq();
176
177         dec_preempt_count();
178 #ifdef CONFIG_TRACE_IRQFLAGS
179         local_irq_enable();
180 #endif
181         preempt_check_resched();
182 }
183
184 void local_bh_enable(void)
185 {
186         _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
187 }
188 EXPORT_SYMBOL(local_bh_enable);
189
190 void local_bh_enable_ip(unsigned long ip)
191 {
192         _local_bh_enable_ip(ip);
193 }
194 EXPORT_SYMBOL(local_bh_enable_ip);
195
196 /*
197  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
198  * and we fall back to softirqd after that.
199  *
200  * This number has been established via experimentation.
201  * The two things to balance is latency against fairness -
202  * we want to handle softirqs as soon as possible, but they
203  * should not be able to lock up the box.
204  */
205 #define MAX_SOFTIRQ_RESTART 10
206
207 asmlinkage void __do_softirq(void)
208 {
209         struct softirq_action *h;
210         __u32 pending;
211         int max_restart = MAX_SOFTIRQ_RESTART;
212         int cpu;
213
214         pending = local_softirq_pending();
215         account_system_vtime(current);
216
217         __local_bh_disable((unsigned long)__builtin_return_address(0),
218                                 SOFTIRQ_OFFSET);
219         lockdep_softirq_enter();
220
221         cpu = smp_processor_id();
222 restart:
223         /* Reset the pending bitmask before enabling irqs */
224         set_softirq_pending(0);
225
226         local_irq_enable();
227
228         h = softirq_vec;
229
230         do {
231                 if (pending & 1) {
232                         unsigned int vec_nr = h - softirq_vec;
233                         int prev_count = preempt_count();
234
235                         kstat_incr_softirqs_this_cpu(vec_nr);
236
237                         trace_softirq_entry(vec_nr);
238                         h->action(h);
239                         trace_softirq_exit(vec_nr);
240                         if (unlikely(prev_count != preempt_count())) {
241                                 printk(KERN_ERR "huh, entered softirq %u %s %p"
242                                        "with preempt_count %08x,"
243                                        " exited with %08x?\n", vec_nr,
244                                        softirq_to_name[vec_nr], h->action,
245                                        prev_count, preempt_count());
246                                 preempt_count() = prev_count;
247                         }
248
249                         rcu_bh_qs(cpu);
250                 }
251                 h++;
252                 pending >>= 1;
253         } while (pending);
254
255         local_irq_disable();
256
257         pending = local_softirq_pending();
258         if (pending && --max_restart)
259                 goto restart;
260
261         if (pending)
262                 wakeup_softirqd();
263
264         lockdep_softirq_exit();
265
266         account_system_vtime(current);
267         __local_bh_enable(SOFTIRQ_OFFSET);
268 }
269
270 #ifndef __ARCH_HAS_DO_SOFTIRQ
271
272 asmlinkage void do_softirq(void)
273 {
274         __u32 pending;
275         unsigned long flags;
276
277         if (in_interrupt())
278                 return;
279
280         local_irq_save(flags);
281
282         pending = local_softirq_pending();
283
284         if (pending)
285                 __do_softirq();
286
287         local_irq_restore(flags);
288 }
289
290 #endif
291
292 /*
293  * Enter an interrupt context.
294  */
295 void irq_enter(void)
296 {
297         int cpu = smp_processor_id();
298
299         rcu_irq_enter();
300         if (idle_cpu(cpu) && !in_interrupt()) {
301                 /*
302                  * Prevent raise_softirq from needlessly waking up ksoftirqd
303                  * here, as softirq will be serviced on return from interrupt.
304                  */
305                 local_bh_disable();
306                 tick_check_idle(cpu);
307                 _local_bh_enable();
308         }
309
310         __irq_enter();
311 }
312
313 static inline void invoke_softirq(void)
314 {
315         if (!force_irqthreads) {
316 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
317                 __do_softirq();
318 #else
319                 do_softirq();
320 #endif
321         } else {
322                 __local_bh_disable((unsigned long)__builtin_return_address(0),
323                                 SOFTIRQ_OFFSET);
324                 wakeup_softirqd();
325                 __local_bh_enable(SOFTIRQ_OFFSET);
326         }
327 }
328
329 /*
330  * Exit an interrupt context. Process softirqs if needed and possible:
331  */
332 void irq_exit(void)
333 {
334         account_system_vtime(current);
335         trace_hardirq_exit();
336         sub_preempt_count(IRQ_EXIT_OFFSET);
337         if (!in_interrupt() && local_softirq_pending())
338                 invoke_softirq();
339
340 #ifdef CONFIG_NO_HZ
341         /* Make sure that timer wheel updates are propagated */
342         if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
343                 tick_nohz_irq_exit();
344 #endif
345         rcu_irq_exit();
346         preempt_enable_no_resched();
347 }
348
349 /*
350  * This function must run with irqs disabled!
351  */
352 inline void raise_softirq_irqoff(unsigned int nr)
353 {
354         __raise_softirq_irqoff(nr);
355
356         /*
357          * If we're in an interrupt or softirq, we're done
358          * (this also catches softirq-disabled code). We will
359          * actually run the softirq once we return from
360          * the irq or softirq.
361          *
362          * Otherwise we wake up ksoftirqd to make sure we
363          * schedule the softirq soon.
364          */
365         if (!in_interrupt())
366                 wakeup_softirqd();
367 }
368
369 void raise_softirq(unsigned int nr)
370 {
371         unsigned long flags;
372
373         local_irq_save(flags);
374         raise_softirq_irqoff(nr);
375         local_irq_restore(flags);
376 }
377
378 void open_softirq(int nr, void (*action)(struct softirq_action *))
379 {
380         softirq_vec[nr].action = action;
381 }
382
383 /*
384  * Tasklets
385  */
386 struct tasklet_head
387 {
388         struct tasklet_struct *head;
389         struct tasklet_struct **tail;
390 };
391
392 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
393 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
394
395 void __tasklet_schedule(struct tasklet_struct *t)
396 {
397         unsigned long flags;
398
399         local_irq_save(flags);
400         t->next = NULL;
401         *__this_cpu_read(tasklet_vec.tail) = t;
402         __this_cpu_write(tasklet_vec.tail, &(t->next));
403         raise_softirq_irqoff(TASKLET_SOFTIRQ);
404         local_irq_restore(flags);
405 }
406
407 EXPORT_SYMBOL(__tasklet_schedule);
408
409 void __tasklet_hi_schedule(struct tasklet_struct *t)
410 {
411         unsigned long flags;
412
413         local_irq_save(flags);
414         t->next = NULL;
415         *__this_cpu_read(tasklet_hi_vec.tail) = t;
416         __this_cpu_write(tasklet_hi_vec.tail,  &(t->next));
417         raise_softirq_irqoff(HI_SOFTIRQ);
418         local_irq_restore(flags);
419 }
420
421 EXPORT_SYMBOL(__tasklet_hi_schedule);
422
423 void __tasklet_hi_schedule_first(struct tasklet_struct *t)
424 {
425         BUG_ON(!irqs_disabled());
426
427         t->next = __this_cpu_read(tasklet_hi_vec.head);
428         __this_cpu_write(tasklet_hi_vec.head, t);
429         __raise_softirq_irqoff(HI_SOFTIRQ);
430 }
431
432 EXPORT_SYMBOL(__tasklet_hi_schedule_first);
433
434 static void tasklet_action(struct softirq_action *a)
435 {
436         struct tasklet_struct *list;
437
438         local_irq_disable();
439         list = __this_cpu_read(tasklet_vec.head);
440         __this_cpu_write(tasklet_vec.head, NULL);
441         __this_cpu_write(tasklet_vec.tail, &__get_cpu_var(tasklet_vec).head);
442         local_irq_enable();
443
444         while (list) {
445                 struct tasklet_struct *t = list;
446
447                 list = list->next;
448
449                 if (tasklet_trylock(t)) {
450                         if (!atomic_read(&t->count)) {
451                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
452                                         BUG();
453                                 t->func(t->data);
454                                 tasklet_unlock(t);
455                                 continue;
456                         }
457                         tasklet_unlock(t);
458                 }
459
460                 local_irq_disable();
461                 t->next = NULL;
462                 *__this_cpu_read(tasklet_vec.tail) = t;
463                 __this_cpu_write(tasklet_vec.tail, &(t->next));
464                 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
465                 local_irq_enable();
466         }
467 }
468
469 static void tasklet_hi_action(struct softirq_action *a)
470 {
471         struct tasklet_struct *list;
472
473         local_irq_disable();
474         list = __this_cpu_read(tasklet_hi_vec.head);
475         __this_cpu_write(tasklet_hi_vec.head, NULL);
476         __this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head);
477         local_irq_enable();
478
479         while (list) {
480                 struct tasklet_struct *t = list;
481
482                 list = list->next;
483
484                 if (tasklet_trylock(t)) {
485                         if (!atomic_read(&t->count)) {
486                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
487                                         BUG();
488                                 t->func(t->data);
489                                 tasklet_unlock(t);
490                                 continue;
491                         }
492                         tasklet_unlock(t);
493                 }
494
495                 local_irq_disable();
496                 t->next = NULL;
497                 *__this_cpu_read(tasklet_hi_vec.tail) = t;
498                 __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
499                 __raise_softirq_irqoff(HI_SOFTIRQ);
500                 local_irq_enable();
501         }
502 }
503
504
505 void tasklet_init(struct tasklet_struct *t,
506                   void (*func)(unsigned long), unsigned long data)
507 {
508         t->next = NULL;
509         t->state = 0;
510         atomic_set(&t->count, 0);
511         t->func = func;
512         t->data = data;
513 }
514
515 EXPORT_SYMBOL(tasklet_init);
516
517 void tasklet_kill(struct tasklet_struct *t)
518 {
519         if (in_interrupt())
520                 printk("Attempt to kill tasklet from interrupt\n");
521
522         while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
523                 do {
524                         yield();
525                 } while (test_bit(TASKLET_STATE_SCHED, &t->state));
526         }
527         tasklet_unlock_wait(t);
528         clear_bit(TASKLET_STATE_SCHED, &t->state);
529 }
530
531 EXPORT_SYMBOL(tasklet_kill);
532
533 /*
534  * tasklet_hrtimer
535  */
536
537 /*
538  * The trampoline is called when the hrtimer expires. It schedules a tasklet
539  * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
540  * hrtimer callback, but from softirq context.
541  */
542 static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
543 {
544         struct tasklet_hrtimer *ttimer =
545                 container_of(timer, struct tasklet_hrtimer, timer);
546
547         tasklet_hi_schedule(&ttimer->tasklet);
548         return HRTIMER_NORESTART;
549 }
550
551 /*
552  * Helper function which calls the hrtimer callback from
553  * tasklet/softirq context
554  */
555 static void __tasklet_hrtimer_trampoline(unsigned long data)
556 {
557         struct tasklet_hrtimer *ttimer = (void *)data;
558         enum hrtimer_restart restart;
559
560         restart = ttimer->function(&ttimer->timer);
561         if (restart != HRTIMER_NORESTART)
562                 hrtimer_restart(&ttimer->timer);
563 }
564
565 /**
566  * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
567  * @ttimer:      tasklet_hrtimer which is initialized
568  * @function:    hrtimer callback function which gets called from softirq context
569  * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
570  * @mode:        hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
571  */
572 void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
573                           enum hrtimer_restart (*function)(struct hrtimer *),
574                           clockid_t which_clock, enum hrtimer_mode mode)
575 {
576         hrtimer_init(&ttimer->timer, which_clock, mode);
577         ttimer->timer.function = __hrtimer_tasklet_trampoline;
578         tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
579                      (unsigned long)ttimer);
580         ttimer->function = function;
581 }
582 EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
583
584 /*
585  * Remote softirq bits
586  */
587
588 DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
589 EXPORT_PER_CPU_SYMBOL(softirq_work_list);
590
591 static void __local_trigger(struct call_single_data *cp, int softirq)
592 {
593         struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
594
595         list_add_tail(&cp->list, head);
596
597         /* Trigger the softirq only if the list was previously empty.  */
598         if (head->next == &cp->list)
599                 raise_softirq_irqoff(softirq);
600 }
601
602 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
603 static void remote_softirq_receive(void *data)
604 {
605         struct call_single_data *cp = data;
606         unsigned long flags;
607         int softirq;
608
609         softirq = cp->priv;
610
611         local_irq_save(flags);
612         __local_trigger(cp, softirq);
613         local_irq_restore(flags);
614 }
615
616 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
617 {
618         if (cpu_online(cpu)) {
619                 cp->func = remote_softirq_receive;
620                 cp->info = cp;
621                 cp->flags = 0;
622                 cp->priv = softirq;
623
624                 __smp_call_function_single(cpu, cp, 0);
625                 return 0;
626         }
627         return 1;
628 }
629 #else /* CONFIG_USE_GENERIC_SMP_HELPERS */
630 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
631 {
632         return 1;
633 }
634 #endif
635
636 /**
637  * __send_remote_softirq - try to schedule softirq work on a remote cpu
638  * @cp: private SMP call function data area
639  * @cpu: the remote cpu
640  * @this_cpu: the currently executing cpu
641  * @softirq: the softirq for the work
642  *
643  * Attempt to schedule softirq work on a remote cpu.  If this cannot be
644  * done, the work is instead queued up on the local cpu.
645  *
646  * Interrupts must be disabled.
647  */
648 void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
649 {
650         if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
651                 __local_trigger(cp, softirq);
652 }
653 EXPORT_SYMBOL(__send_remote_softirq);
654
655 /**
656  * send_remote_softirq - try to schedule softirq work on a remote cpu
657  * @cp: private SMP call function data area
658  * @cpu: the remote cpu
659  * @softirq: the softirq for the work
660  *
661  * Like __send_remote_softirq except that disabling interrupts and
662  * computing the current cpu is done for the caller.
663  */
664 void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
665 {
666         unsigned long flags;
667         int this_cpu;
668
669         local_irq_save(flags);
670         this_cpu = smp_processor_id();
671         __send_remote_softirq(cp, cpu, this_cpu, softirq);
672         local_irq_restore(flags);
673 }
674 EXPORT_SYMBOL(send_remote_softirq);
675
676 static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
677                                                unsigned long action, void *hcpu)
678 {
679         /*
680          * If a CPU goes away, splice its entries to the current CPU
681          * and trigger a run of the softirq
682          */
683         if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
684                 int cpu = (unsigned long) hcpu;
685                 int i;
686
687                 local_irq_disable();
688                 for (i = 0; i < NR_SOFTIRQS; i++) {
689                         struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
690                         struct list_head *local_head;
691
692                         if (list_empty(head))
693                                 continue;
694
695                         local_head = &__get_cpu_var(softirq_work_list[i]);
696                         list_splice_init(head, local_head);
697                         raise_softirq_irqoff(i);
698                 }
699                 local_irq_enable();
700         }
701
702         return NOTIFY_OK;
703 }
704
705 static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
706         .notifier_call  = remote_softirq_cpu_notify,
707 };
708
709 void __init softirq_init(void)
710 {
711         int cpu;
712
713         for_each_possible_cpu(cpu) {
714                 int i;
715
716                 per_cpu(tasklet_vec, cpu).tail =
717                         &per_cpu(tasklet_vec, cpu).head;
718                 per_cpu(tasklet_hi_vec, cpu).tail =
719                         &per_cpu(tasklet_hi_vec, cpu).head;
720                 for (i = 0; i < NR_SOFTIRQS; i++)
721                         INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
722         }
723
724         register_hotcpu_notifier(&remote_softirq_cpu_notifier);
725
726         open_softirq(TASKLET_SOFTIRQ, tasklet_action);
727         open_softirq(HI_SOFTIRQ, tasklet_hi_action);
728 }
729
730 static int run_ksoftirqd(void * __bind_cpu)
731 {
732         set_current_state(TASK_INTERRUPTIBLE);
733
734         while (!kthread_should_stop()) {
735                 preempt_disable();
736                 if (!local_softirq_pending()) {
737                         preempt_enable_no_resched();
738                         schedule();
739                         preempt_disable();
740                 }
741
742                 __set_current_state(TASK_RUNNING);
743
744                 while (local_softirq_pending()) {
745                         /* Preempt disable stops cpu going offline.
746                            If already offline, we'll be on wrong CPU:
747                            don't process */
748                         if (cpu_is_offline((long)__bind_cpu))
749                                 goto wait_to_die;
750                         local_irq_disable();
751                         if (local_softirq_pending())
752                                 __do_softirq();
753                         local_irq_enable();
754                         preempt_enable_no_resched();
755                         cond_resched();
756                         preempt_disable();
757                         rcu_note_context_switch((long)__bind_cpu);
758                 }
759                 preempt_enable();
760                 set_current_state(TASK_INTERRUPTIBLE);
761         }
762         __set_current_state(TASK_RUNNING);
763         return 0;
764
765 wait_to_die:
766         preempt_enable();
767         /* Wait for kthread_stop */
768         set_current_state(TASK_INTERRUPTIBLE);
769         while (!kthread_should_stop()) {
770                 schedule();
771                 set_current_state(TASK_INTERRUPTIBLE);
772         }
773         __set_current_state(TASK_RUNNING);
774         return 0;
775 }
776
777 #ifdef CONFIG_HOTPLUG_CPU
778 /*
779  * tasklet_kill_immediate is called to remove a tasklet which can already be
780  * scheduled for execution on @cpu.
781  *
782  * Unlike tasklet_kill, this function removes the tasklet
783  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
784  *
785  * When this function is called, @cpu must be in the CPU_DEAD state.
786  */
787 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
788 {
789         struct tasklet_struct **i;
790
791         BUG_ON(cpu_online(cpu));
792         BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
793
794         if (!test_bit(TASKLET_STATE_SCHED, &t->state))
795                 return;
796
797         /* CPU is dead, so no lock needed. */
798         for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
799                 if (*i == t) {
800                         *i = t->next;
801                         /* If this was the tail element, move the tail ptr */
802                         if (*i == NULL)
803                                 per_cpu(tasklet_vec, cpu).tail = i;
804                         return;
805                 }
806         }
807         BUG();
808 }
809
810 static void takeover_tasklets(unsigned int cpu)
811 {
812         /* CPU is dead, so no lock needed. */
813         local_irq_disable();
814
815         /* Find end, append list for that CPU. */
816         if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
817                 *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
818                 this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
819                 per_cpu(tasklet_vec, cpu).head = NULL;
820                 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
821         }
822         raise_softirq_irqoff(TASKLET_SOFTIRQ);
823
824         if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
825                 *__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
826                 __this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
827                 per_cpu(tasklet_hi_vec, cpu).head = NULL;
828                 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
829         }
830         raise_softirq_irqoff(HI_SOFTIRQ);
831
832         local_irq_enable();
833 }
834 #endif /* CONFIG_HOTPLUG_CPU */
835
836 static int __cpuinit cpu_callback(struct notifier_block *nfb,
837                                   unsigned long action,
838                                   void *hcpu)
839 {
840         int hotcpu = (unsigned long)hcpu;
841         struct task_struct *p;
842
843         switch (action) {
844         case CPU_UP_PREPARE:
845         case CPU_UP_PREPARE_FROZEN:
846                 p = kthread_create_on_node(run_ksoftirqd,
847                                            hcpu,
848                                            cpu_to_node(hotcpu),
849                                            "ksoftirqd/%d", hotcpu);
850                 if (IS_ERR(p)) {
851                         printk("ksoftirqd for %i failed\n", hotcpu);
852                         return notifier_from_errno(PTR_ERR(p));
853                 }
854                 kthread_bind(p, hotcpu);
855                 per_cpu(ksoftirqd, hotcpu) = p;
856                 break;
857         case CPU_ONLINE:
858         case CPU_ONLINE_FROZEN:
859                 wake_up_process(per_cpu(ksoftirqd, hotcpu));
860                 break;
861 #ifdef CONFIG_HOTPLUG_CPU
862         case CPU_UP_CANCELED:
863         case CPU_UP_CANCELED_FROZEN:
864                 if (!per_cpu(ksoftirqd, hotcpu))
865                         break;
866                 /* Unbind so it can run.  Fall thru. */
867                 kthread_bind(per_cpu(ksoftirqd, hotcpu),
868                              cpumask_any(cpu_online_mask));
869         case CPU_DEAD:
870         case CPU_DEAD_FROZEN: {
871                 static const struct sched_param param = {
872                         .sched_priority = MAX_RT_PRIO-1
873                 };
874
875                 p = per_cpu(ksoftirqd, hotcpu);
876                 per_cpu(ksoftirqd, hotcpu) = NULL;
877                 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
878                 kthread_stop(p);
879                 takeover_tasklets(hotcpu);
880                 break;
881         }
882 #endif /* CONFIG_HOTPLUG_CPU */
883         }
884         return NOTIFY_OK;
885 }
886
887 static struct notifier_block __cpuinitdata cpu_nfb = {
888         .notifier_call = cpu_callback
889 };
890
891 static __init int spawn_ksoftirqd(void)
892 {
893         void *cpu = (void *)(long)smp_processor_id();
894         int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
895
896         BUG_ON(err != NOTIFY_OK);
897         cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
898         register_cpu_notifier(&cpu_nfb);
899         return 0;
900 }
901 early_initcall(spawn_ksoftirqd);
902
903 /*
904  * [ These __weak aliases are kept in a separate compilation unit, so that
905  *   GCC does not inline them incorrectly. ]
906  */
907
908 int __init __weak early_irq_init(void)
909 {
910         return 0;
911 }
912
913 #ifdef CONFIG_GENERIC_HARDIRQS
914 int __init __weak arch_probe_nr_irqs(void)
915 {
916         return NR_IRQS_LEGACY;
917 }
918
919 int __init __weak arch_early_irq_init(void)
920 {
921         return 0;
922 }
923 #endif