tick: Introduce hrtimer based broadcast
[firefly-linux-kernel-4.4.55.git] / kernel / time / tick-broadcast.c
1 /*
2  * linux/kernel/time/tick-broadcast.c
3  *
4  * This file contains functions which emulate a local clock-event
5  * device via a broadcast event source.
6  *
7  * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
8  * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
9  * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
10  *
11  * This code is licenced under the GPL version 2. For details see
12  * kernel-base/COPYING.
13  */
14 #include <linux/cpu.h>
15 #include <linux/err.h>
16 #include <linux/hrtimer.h>
17 #include <linux/interrupt.h>
18 #include <linux/percpu.h>
19 #include <linux/profile.h>
20 #include <linux/sched.h>
21 #include <linux/smp.h>
22
23 #include "tick-internal.h"
24
25 /*
26  * Broadcast support for broken x86 hardware, where the local apic
27  * timer stops in C3 state.
28  */
29
30 static struct tick_device tick_broadcast_device;
31 static cpumask_var_t tick_broadcast_mask;
32 static cpumask_var_t tmpmask;
33 static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
34 static int tick_broadcast_force;
35
36 #ifdef CONFIG_TICK_ONESHOT
37 static void tick_broadcast_clear_oneshot(int cpu);
38 #else
39 static inline void tick_broadcast_clear_oneshot(int cpu) { }
40 #endif
41
42 /*
43  * Debugging: see timer_list.c
44  */
45 struct tick_device *tick_get_broadcast_device(void)
46 {
47         return &tick_broadcast_device;
48 }
49
50 struct cpumask *tick_get_broadcast_mask(void)
51 {
52         return tick_broadcast_mask;
53 }
54
55 /*
56  * Start the device in periodic mode
57  */
58 static void tick_broadcast_start_periodic(struct clock_event_device *bc)
59 {
60         if (bc)
61                 tick_setup_periodic(bc, 1);
62 }
63
64 /*
65  * Check, if the device can be utilized as broadcast device:
66  */
67 void tick_install_broadcast_device(struct clock_event_device *dev)
68 {
69         struct clock_event_device *cur = tick_broadcast_device.evtdev;
70
71         if ((dev->features & CLOCK_EVT_FEAT_DUMMY) ||
72             (tick_broadcast_device.evtdev &&
73              tick_broadcast_device.evtdev->rating >= dev->rating) ||
74              (dev->features & CLOCK_EVT_FEAT_C3STOP))
75                 return;
76
77         clockevents_exchange_device(tick_broadcast_device.evtdev, dev);
78         if (cur)
79                 cur->event_handler = clockevents_handle_noop;
80         tick_broadcast_device.evtdev = dev;
81         if (!cpumask_empty(tick_broadcast_mask))
82                 tick_broadcast_start_periodic(dev);
83         /*
84          * Inform all cpus about this. We might be in a situation
85          * where we did not switch to oneshot mode because the per cpu
86          * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
87          * of a oneshot capable broadcast device. Without that
88          * notification the systems stays stuck in periodic mode
89          * forever.
90          */
91         if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
92                 tick_clock_notify();
93 }
94
95 /*
96  * Check, if the device is the broadcast device
97  */
98 int tick_is_broadcast_device(struct clock_event_device *dev)
99 {
100         return (dev && tick_broadcast_device.evtdev == dev);
101 }
102
103 static void err_broadcast(const struct cpumask *mask)
104 {
105         pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
106 }
107
108 static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
109 {
110         if (!dev->broadcast)
111                 dev->broadcast = tick_broadcast;
112         if (!dev->broadcast) {
113                 pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
114                              dev->name);
115                 dev->broadcast = err_broadcast;
116         }
117 }
118
119 /*
120  * Check, if the device is disfunctional and a place holder, which
121  * needs to be handled by the broadcast device.
122  */
123 int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
124 {
125         unsigned long flags;
126         int ret = 0;
127
128         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
129
130         /*
131          * Devices might be registered with both periodic and oneshot
132          * mode disabled. This signals, that the device needs to be
133          * operated from the broadcast device and is a placeholder for
134          * the cpu local device.
135          */
136         if (!tick_device_is_functional(dev)) {
137                 dev->event_handler = tick_handle_periodic;
138                 tick_device_setup_broadcast_func(dev);
139                 cpumask_set_cpu(cpu, tick_broadcast_mask);
140                 tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
141                 ret = 1;
142         } else {
143                 /*
144                  * When the new device is not affected by the stop
145                  * feature and the cpu is marked in the broadcast mask
146                  * then clear the broadcast bit.
147                  */
148                 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
149                         int cpu = smp_processor_id();
150                         cpumask_clear_cpu(cpu, tick_broadcast_mask);
151                         tick_broadcast_clear_oneshot(cpu);
152                 } else {
153                         tick_device_setup_broadcast_func(dev);
154                 }
155         }
156         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
157         return ret;
158 }
159
160 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
161 int tick_receive_broadcast(void)
162 {
163         struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
164         struct clock_event_device *evt = td->evtdev;
165
166         if (!evt)
167                 return -ENODEV;
168
169         if (!evt->event_handler)
170                 return -EINVAL;
171
172         evt->event_handler(evt);
173         return 0;
174 }
175 #endif
176
177 /*
178  * Broadcast the event to the cpus, which are set in the mask (mangled).
179  */
180 static void tick_do_broadcast(struct cpumask *mask)
181 {
182         int cpu = smp_processor_id();
183         struct tick_device *td;
184
185         /*
186          * Check, if the current cpu is in the mask
187          */
188         if (cpumask_test_cpu(cpu, mask)) {
189                 cpumask_clear_cpu(cpu, mask);
190                 td = &per_cpu(tick_cpu_device, cpu);
191                 td->evtdev->event_handler(td->evtdev);
192         }
193
194         if (!cpumask_empty(mask)) {
195                 /*
196                  * It might be necessary to actually check whether the devices
197                  * have different broadcast functions. For now, just use the
198                  * one of the first device. This works as long as we have this
199                  * misfeature only on x86 (lapic)
200                  */
201                 td = &per_cpu(tick_cpu_device, cpumask_first(mask));
202                 td->evtdev->broadcast(mask);
203         }
204 }
205
206 /*
207  * Periodic broadcast:
208  * - invoke the broadcast handlers
209  */
210 static void tick_do_periodic_broadcast(void)
211 {
212         raw_spin_lock(&tick_broadcast_lock);
213
214         cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
215         tick_do_broadcast(tmpmask);
216
217         raw_spin_unlock(&tick_broadcast_lock);
218 }
219
220 /*
221  * Event handler for periodic broadcast ticks
222  */
223 static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
224 {
225         ktime_t next;
226
227         tick_do_periodic_broadcast();
228
229         /*
230          * The device is in periodic mode. No reprogramming necessary:
231          */
232         if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
233                 return;
234
235         /*
236          * Setup the next period for devices, which do not have
237          * periodic mode. We read dev->next_event first and add to it
238          * when the event already expired. clockevents_program_event()
239          * sets dev->next_event only when the event is really
240          * programmed to the device.
241          */
242         for (next = dev->next_event; ;) {
243                 next = ktime_add(next, tick_period);
244
245                 if (!clockevents_program_event(dev, next, false))
246                         return;
247                 tick_do_periodic_broadcast();
248         }
249 }
250
251 /*
252  * Powerstate information: The system enters/leaves a state, where
253  * affected devices might stop
254  */
255 static void tick_do_broadcast_on_off(unsigned long *reason)
256 {
257         struct clock_event_device *bc, *dev;
258         struct tick_device *td;
259         unsigned long flags;
260         int cpu, bc_stopped;
261
262         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
263
264         cpu = smp_processor_id();
265         td = &per_cpu(tick_cpu_device, cpu);
266         dev = td->evtdev;
267         bc = tick_broadcast_device.evtdev;
268
269         /*
270          * Is the device not affected by the powerstate ?
271          */
272         if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
273                 goto out;
274
275         if (!tick_device_is_functional(dev))
276                 goto out;
277
278         bc_stopped = cpumask_empty(tick_broadcast_mask);
279
280         switch (*reason) {
281         case CLOCK_EVT_NOTIFY_BROADCAST_ON:
282         case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
283                 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
284                         if (tick_broadcast_device.mode ==
285                             TICKDEV_MODE_PERIODIC)
286                                 clockevents_shutdown(dev);
287                 }
288                 if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
289                         tick_broadcast_force = 1;
290                 break;
291         case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
292                 if (!tick_broadcast_force &&
293                     cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
294                         if (tick_broadcast_device.mode ==
295                             TICKDEV_MODE_PERIODIC)
296                                 tick_setup_periodic(dev, 0);
297                 }
298                 break;
299         }
300
301         if (cpumask_empty(tick_broadcast_mask)) {
302                 if (!bc_stopped)
303                         clockevents_shutdown(bc);
304         } else if (bc_stopped) {
305                 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
306                         tick_broadcast_start_periodic(bc);
307                 else
308                         tick_broadcast_setup_oneshot(bc);
309         }
310 out:
311         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
312 }
313
314 /*
315  * Powerstate information: The system enters/leaves a state, where
316  * affected devices might stop.
317  */
318 void tick_broadcast_on_off(unsigned long reason, int *oncpu)
319 {
320         if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
321                 printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
322                        "offline CPU #%d\n", *oncpu);
323         else
324                 tick_do_broadcast_on_off(&reason);
325 }
326
327 /*
328  * Set the periodic handler depending on broadcast on/off
329  */
330 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
331 {
332         if (!broadcast)
333                 dev->event_handler = tick_handle_periodic;
334         else
335                 dev->event_handler = tick_handle_periodic_broadcast;
336 }
337
338 /*
339  * Remove a CPU from broadcasting
340  */
341 void tick_shutdown_broadcast(unsigned int *cpup)
342 {
343         struct clock_event_device *bc;
344         unsigned long flags;
345         unsigned int cpu = *cpup;
346
347         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
348
349         bc = tick_broadcast_device.evtdev;
350         cpumask_clear_cpu(cpu, tick_broadcast_mask);
351
352         if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
353                 if (bc && cpumask_empty(tick_broadcast_mask))
354                         clockevents_shutdown(bc);
355         }
356
357         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
358 }
359
360 void tick_suspend_broadcast(void)
361 {
362         struct clock_event_device *bc;
363         unsigned long flags;
364
365         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
366
367         bc = tick_broadcast_device.evtdev;
368         if (bc)
369                 clockevents_shutdown(bc);
370
371         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
372 }
373
374 int tick_resume_broadcast(void)
375 {
376         struct clock_event_device *bc;
377         unsigned long flags;
378         int broadcast = 0;
379
380         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
381
382         bc = tick_broadcast_device.evtdev;
383
384         if (bc) {
385                 clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
386
387                 switch (tick_broadcast_device.mode) {
388                 case TICKDEV_MODE_PERIODIC:
389                         if (!cpumask_empty(tick_broadcast_mask))
390                                 tick_broadcast_start_periodic(bc);
391                         broadcast = cpumask_test_cpu(smp_processor_id(),
392                                                      tick_broadcast_mask);
393                         break;
394                 case TICKDEV_MODE_ONESHOT:
395                         if (!cpumask_empty(tick_broadcast_mask))
396                                 broadcast = tick_resume_broadcast_oneshot(bc);
397                         break;
398                 }
399         }
400         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
401
402         return broadcast;
403 }
404
405
406 #ifdef CONFIG_TICK_ONESHOT
407
408 static cpumask_var_t tick_broadcast_oneshot_mask;
409 static cpumask_var_t tick_broadcast_pending_mask;
410 static cpumask_var_t tick_broadcast_force_mask;
411
412 /*
413  * Exposed for debugging: see timer_list.c
414  */
415 struct cpumask *tick_get_broadcast_oneshot_mask(void)
416 {
417         return tick_broadcast_oneshot_mask;
418 }
419
420 /*
421  * Called before going idle with interrupts disabled. Checks whether a
422  * broadcast event from the other core is about to happen. We detected
423  * that in tick_broadcast_oneshot_control(). The callsite can use this
424  * to avoid a deep idle transition as we are about to get the
425  * broadcast IPI right away.
426  */
427 int tick_check_broadcast_expired(void)
428 {
429         return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
430 }
431
432 /*
433  * Set broadcast interrupt affinity
434  */
435 static void tick_broadcast_set_affinity(struct clock_event_device *bc,
436                                         const struct cpumask *cpumask)
437 {
438         if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
439                 return;
440
441         if (cpumask_equal(bc->cpumask, cpumask))
442                 return;
443
444         bc->cpumask = cpumask;
445         irq_set_affinity(bc->irq, bc->cpumask);
446 }
447
448 static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
449                                     ktime_t expires, int force)
450 {
451         int ret;
452
453         if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
454                 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
455
456         ret = clockevents_program_event(bc, expires, force);
457         if (!ret)
458                 tick_broadcast_set_affinity(bc, cpumask_of(cpu));
459         return ret;
460 }
461
462 int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
463 {
464         clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
465         return 0;
466 }
467
468 /*
469  * Called from irq_enter() when idle was interrupted to reenable the
470  * per cpu device.
471  */
472 void tick_check_oneshot_broadcast(int cpu)
473 {
474         if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) {
475                 struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
476
477                 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
478         }
479 }
480
481 /*
482  * Handle oneshot mode broadcasting
483  */
484 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
485 {
486         struct tick_device *td;
487         ktime_t now, next_event;
488         int cpu, next_cpu = 0;
489
490         raw_spin_lock(&tick_broadcast_lock);
491 again:
492         dev->next_event.tv64 = KTIME_MAX;
493         next_event.tv64 = KTIME_MAX;
494         cpumask_clear(tmpmask);
495         now = ktime_get();
496         /* Find all expired events */
497         for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
498                 td = &per_cpu(tick_cpu_device, cpu);
499                 if (td->evtdev->next_event.tv64 <= now.tv64) {
500                         cpumask_set_cpu(cpu, tmpmask);
501                         /*
502                          * Mark the remote cpu in the pending mask, so
503                          * it can avoid reprogramming the cpu local
504                          * timer in tick_broadcast_oneshot_control().
505                          */
506                         cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
507                 } else if (td->evtdev->next_event.tv64 < next_event.tv64) {
508                         next_event.tv64 = td->evtdev->next_event.tv64;
509                         next_cpu = cpu;
510                 }
511         }
512
513         /*
514          * Remove the current cpu from the pending mask. The event is
515          * delivered immediately in tick_do_broadcast() !
516          */
517         cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
518
519         /* Take care of enforced broadcast requests */
520         cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
521         cpumask_clear(tick_broadcast_force_mask);
522
523         /*
524          * Wakeup the cpus which have an expired event.
525          */
526         tick_do_broadcast(tmpmask);
527
528         /*
529          * Two reasons for reprogram:
530          *
531          * - The global event did not expire any CPU local
532          * events. This happens in dyntick mode, as the maximum PIT
533          * delta is quite small.
534          *
535          * - There are pending events on sleeping CPUs which were not
536          * in the event mask
537          */
538         if (next_event.tv64 != KTIME_MAX) {
539                 /*
540                  * Rearm the broadcast device. If event expired,
541                  * repeat the above
542                  */
543                 if (tick_broadcast_set_event(dev, next_cpu, next_event, 0))
544                         goto again;
545         }
546         raw_spin_unlock(&tick_broadcast_lock);
547 }
548
549 static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
550 {
551         if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER))
552                 return 0;
553         if (bc->next_event.tv64 == KTIME_MAX)
554                 return 0;
555         return bc->bound_on == cpu ? -EBUSY : 0;
556 }
557
558 static void broadcast_shutdown_local(struct clock_event_device *bc,
559                                      struct clock_event_device *dev)
560 {
561         /*
562          * For hrtimer based broadcasting we cannot shutdown the cpu
563          * local device if our own event is the first one to expire or
564          * if we own the broadcast timer.
565          */
566         if (bc->features & CLOCK_EVT_FEAT_HRTIMER) {
567                 if (broadcast_needs_cpu(bc, smp_processor_id()))
568                         return;
569                 if (dev->next_event.tv64 < bc->next_event.tv64)
570                         return;
571         }
572         clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
573 }
574
575 static void broadcast_move_bc(int deadcpu)
576 {
577         struct clock_event_device *bc = tick_broadcast_device.evtdev;
578
579         if (!bc || !broadcast_needs_cpu(bc, deadcpu))
580                 return;
581         /* This moves the broadcast assignment to this cpu */
582         clockevents_program_event(bc, bc->next_event, 1);
583 }
584
585 /*
586  * Powerstate information: The system enters/leaves a state, where
587  * affected devices might stop
588  * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
589  */
590 int tick_broadcast_oneshot_control(unsigned long reason)
591 {
592         struct clock_event_device *bc, *dev;
593         struct tick_device *td;
594         unsigned long flags;
595         ktime_t now;
596         int cpu, ret = 0;
597
598         /*
599          * Periodic mode does not care about the enter/exit of power
600          * states
601          */
602         if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
603                 return 0;
604
605         /*
606          * We are called with preemtion disabled from the depth of the
607          * idle code, so we can't be moved away.
608          */
609         cpu = smp_processor_id();
610         td = &per_cpu(tick_cpu_device, cpu);
611         dev = td->evtdev;
612
613         if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
614                 return 0;
615
616         bc = tick_broadcast_device.evtdev;
617
618         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
619         if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
620                 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
621                         WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
622                         broadcast_shutdown_local(bc, dev);
623                         /*
624                          * We only reprogram the broadcast timer if we
625                          * did not mark ourself in the force mask and
626                          * if the cpu local event is earlier than the
627                          * broadcast event. If the current CPU is in
628                          * the force mask, then we are going to be
629                          * woken by the IPI right away.
630                          */
631                         if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
632                             dev->next_event.tv64 < bc->next_event.tv64)
633                                 tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
634                 }
635                 /*
636                  * If the current CPU owns the hrtimer broadcast
637                  * mechanism, it cannot go deep idle and we remove the
638                  * CPU from the broadcast mask. We don't have to go
639                  * through the EXIT path as the local timer is not
640                  * shutdown.
641                  */
642                 ret = broadcast_needs_cpu(bc, cpu);
643                 if (ret)
644                         cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
645         } else {
646                 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
647                         clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
648                         /*
649                          * The cpu which was handling the broadcast
650                          * timer marked this cpu in the broadcast
651                          * pending mask and fired the broadcast
652                          * IPI. So we are going to handle the expired
653                          * event anyway via the broadcast IPI
654                          * handler. No need to reprogram the timer
655                          * with an already expired event.
656                          */
657                         if (cpumask_test_and_clear_cpu(cpu,
658                                        tick_broadcast_pending_mask))
659                                 goto out;
660
661                         /*
662                          * Bail out if there is no next event.
663                          */
664                         if (dev->next_event.tv64 == KTIME_MAX)
665                                 goto out;
666                         /*
667                          * If the pending bit is not set, then we are
668                          * either the CPU handling the broadcast
669                          * interrupt or we got woken by something else.
670                          *
671                          * We are not longer in the broadcast mask, so
672                          * if the cpu local expiry time is already
673                          * reached, we would reprogram the cpu local
674                          * timer with an already expired event.
675                          *
676                          * This can lead to a ping-pong when we return
677                          * to idle and therefor rearm the broadcast
678                          * timer before the cpu local timer was able
679                          * to fire. This happens because the forced
680                          * reprogramming makes sure that the event
681                          * will happen in the future and depending on
682                          * the min_delta setting this might be far
683                          * enough out that the ping-pong starts.
684                          *
685                          * If the cpu local next_event has expired
686                          * then we know that the broadcast timer
687                          * next_event has expired as well and
688                          * broadcast is about to be handled. So we
689                          * avoid reprogramming and enforce that the
690                          * broadcast handler, which did not run yet,
691                          * will invoke the cpu local handler.
692                          *
693                          * We cannot call the handler directly from
694                          * here, because we might be in a NOHZ phase
695                          * and we did not go through the irq_enter()
696                          * nohz fixups.
697                          */
698                         now = ktime_get();
699                         if (dev->next_event.tv64 <= now.tv64) {
700                                 cpumask_set_cpu(cpu, tick_broadcast_force_mask);
701                                 goto out;
702                         }
703                         /*
704                          * We got woken by something else. Reprogram
705                          * the cpu local timer device.
706                          */
707                         tick_program_event(dev->next_event, 1);
708                 }
709         }
710 out:
711         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
712         return ret;
713 }
714
715 /*
716  * Reset the one shot broadcast for a cpu
717  *
718  * Called with tick_broadcast_lock held
719  */
720 static void tick_broadcast_clear_oneshot(int cpu)
721 {
722         cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
723 }
724
725 static void tick_broadcast_init_next_event(struct cpumask *mask,
726                                            ktime_t expires)
727 {
728         struct tick_device *td;
729         int cpu;
730
731         for_each_cpu(cpu, mask) {
732                 td = &per_cpu(tick_cpu_device, cpu);
733                 if (td->evtdev)
734                         td->evtdev->next_event = expires;
735         }
736 }
737
738 /**
739  * tick_broadcast_setup_oneshot - setup the broadcast device
740  */
741 void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
742 {
743         int cpu = smp_processor_id();
744
745         /* Set it up only once ! */
746         if (bc->event_handler != tick_handle_oneshot_broadcast) {
747                 int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
748
749                 bc->event_handler = tick_handle_oneshot_broadcast;
750
751                 /*
752                  * We must be careful here. There might be other CPUs
753                  * waiting for periodic broadcast. We need to set the
754                  * oneshot_mask bits for those and program the
755                  * broadcast device to fire.
756                  */
757                 cpumask_copy(tmpmask, tick_broadcast_mask);
758                 cpumask_clear_cpu(cpu, tmpmask);
759                 cpumask_or(tick_broadcast_oneshot_mask,
760                            tick_broadcast_oneshot_mask, tmpmask);
761
762                 if (was_periodic && !cpumask_empty(tmpmask)) {
763                         clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
764                         tick_broadcast_init_next_event(tmpmask,
765                                                        tick_next_period);
766                         tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
767                 } else
768                         bc->next_event.tv64 = KTIME_MAX;
769         } else {
770                 /*
771                  * The first cpu which switches to oneshot mode sets
772                  * the bit for all other cpus which are in the general
773                  * (periodic) broadcast mask. So the bit is set and
774                  * would prevent the first broadcast enter after this
775                  * to program the bc device.
776                  */
777                 tick_broadcast_clear_oneshot(cpu);
778         }
779 }
780
781 /*
782  * Select oneshot operating mode for the broadcast device
783  */
784 void tick_broadcast_switch_to_oneshot(void)
785 {
786         struct clock_event_device *bc;
787         unsigned long flags;
788
789         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
790
791         tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
792         bc = tick_broadcast_device.evtdev;
793         if (bc)
794                 tick_broadcast_setup_oneshot(bc);
795
796         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
797 }
798
799
800 /*
801  * Remove a dead CPU from broadcasting
802  */
803 void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
804 {
805         unsigned long flags;
806         unsigned int cpu = *cpup;
807
808         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
809
810         /*
811          * Clear the broadcast mask flag for the dead cpu, but do not
812          * stop the broadcast device!
813          */
814         cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
815
816         broadcast_move_bc(cpu);
817
818         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
819 }
820
821 /*
822  * Check, whether the broadcast device is in one shot mode
823  */
824 int tick_broadcast_oneshot_active(void)
825 {
826         return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
827 }
828
829 /*
830  * Check whether the broadcast device supports oneshot.
831  */
832 bool tick_broadcast_oneshot_available(void)
833 {
834         struct clock_event_device *bc = tick_broadcast_device.evtdev;
835
836         return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
837 }
838
839 #endif
840
841 void __init tick_broadcast_init(void)
842 {
843         zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
844         zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
845 #ifdef CONFIG_TICK_ONESHOT
846         zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
847         zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
848         zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
849 #endif
850 }