x86: explicitly use edx in const delay function.
[firefly-linux-kernel-4.4.55.git] / arch / x86 / lib / delay_32.c
index aad9d95469dc68dcb25b9fac93b743d44b7d9038..0b659a320b1ec225dcbc7cee1a3e08f9520dda8a 100644 (file)
@@ -3,6 +3,7 @@
  *
  *     Copyright (C) 1993 Linus Torvalds
  *     Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *     Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com>
  *
  *     The __delay function must _NOT_ be inlined as its execution time
  *     depends wildly on alignment on many x86 processors. The additional
 
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/timex.h>
 #include <linux/preempt.h>
 #include <linux/delay.h>
+#include <linux/init.h>
 
 #include <asm/processor.h>
 #include <asm/delay.h>
 /* simple loop based delay: */
 static void delay_loop(unsigned long loops)
 {
-       int d0;
-
        __asm__ __volatile__(
-               "\tjmp 1f\n"
-               ".align 16\n"
-               "1:\tjmp 2f\n"
-               ".align 16\n"
-               "2:\tdecl %0\n\tjns 2b"
-               :"=&a" (d0)
-               :"0" (loops));
+               "       test %0,%0      \n"
+               "       jz 3f           \n"
+               "       jmp 1f          \n"
+
+               ".align 16              \n"
+               "1:     jmp 2f          \n"
+
+               ".align 16              \n"
+               "2:     dec %0          \n"
+               "       jnz 2b          \n"
+               "3:     dec %0          \n"
+
+               : /* we don't need output */
+               :"a" (loops)
+       );
 }
 
 /* TSC based delay: */
 static void delay_tsc(unsigned long loops)
 {
        unsigned long bclock, now;
+       int cpu;
 
-       preempt_disable();              /* TSC's are per-cpu */
+       preempt_disable();
+       cpu = smp_processor_id();
        rdtscl(bclock);
-       do {
-               rep_nop();
+       for (;;) {
                rdtscl(now);
-       } while ((now-bclock) < loops);
+               if ((now - bclock) >= loops)
+                       break;
+
+               /* Allow RT tasks to run */
+               preempt_enable();
+               rep_nop();
+               preempt_disable();
+
+               /*
+                * It is possible that we moved to another CPU, and
+                * since TSC's are per-cpu we need to calculate
+                * that. The delay must guarantee that we wait "at
+                * least" the amount of time. Being moved to another
+                * CPU could make the wait longer but we just need to
+                * make sure we waited long enough. Rebalance the
+                * counter for this CPU.
+                */
+               if (unlikely(cpu != smp_processor_id())) {
+                       loops -= (now - bclock);
+                       cpu = smp_processor_id();
+                       rdtscl(bclock);
+               }
+       }
        preempt_enable();
 }
 
@@ -63,10 +95,10 @@ void use_tsc_delay(void)
        delay_fn = delay_tsc;
 }
 
-int read_current_timer(unsigned long *timer_val)
+int __devinit read_current_timer(unsigned long *timer_val)
 {
        if (delay_fn == delay_tsc) {
-               rdtscl(*timer_val);
+               rdtscll(*timer_val);
                return 0;
        }
        return -1;
@@ -82,7 +114,7 @@ inline void __const_udelay(unsigned long xloops)
        int d0;
 
        xloops *= 4;
-       __asm__("mull %0"
+       __asm__("mull %%edx"
                :"=d" (xloops), "=&a" (d0)
                :"1" (xloops), "0"
                (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4)));