x86: explicitly use edx in const delay function.
[firefly-linux-kernel-4.4.55.git] / arch / x86 / lib / delay_32.c
index d710f2d167bb49505e56ed4c0e9c5cc91a52546e..0b659a320b1ec225dcbc7cee1a3e08f9520dda8a 100644 (file)
@@ -3,6 +3,7 @@
  *
  *     Copyright (C) 1993 Linus Torvalds
  *     Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *     Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com>
  *
  *     The __delay function must _NOT_ be inlined as its execution time
  *     depends wildly on alignment on many x86 processors. The additional
 /* simple loop based delay: */
 static void delay_loop(unsigned long loops)
 {
-       int d0;
-
        __asm__ __volatile__(
-               "\tjmp 1f\n"
-               ".align 16\n"
-               "1:\tjmp 2f\n"
-               ".align 16\n"
-               "2:\tdecl %0\n\tjns 2b"
-               :"=&a" (d0)
-               :"0" (loops));
+               "       test %0,%0      \n"
+               "       jz 3f           \n"
+               "       jmp 1f          \n"
+
+               ".align 16              \n"
+               "1:     jmp 2f          \n"
+
+               ".align 16              \n"
+               "2:     dec %0          \n"
+               "       jnz 2b          \n"
+               "3:     dec %0          \n"
+
+               : /* we don't need output */
+               :"a" (loops)
+       );
 }
 
 /* TSC based delay: */
@@ -91,7 +98,7 @@ void use_tsc_delay(void)
 int __devinit read_current_timer(unsigned long *timer_val)
 {
        if (delay_fn == delay_tsc) {
-               rdtscl(*timer_val);
+               rdtscll(*timer_val);
                return 0;
        }
        return -1;
@@ -107,7 +114,7 @@ inline void __const_udelay(unsigned long xloops)
        int d0;
 
        xloops *= 4;
-       __asm__("mull %0"
+       __asm__("mull %%edx"
                :"=d" (xloops), "=&a" (d0)
                :"1" (xloops), "0"
                (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4)));