From 4d803fcdcd97dd346d4b39c3b76e5879cead8a31 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 8 Sep 2005 14:37:53 -0700
Subject: [PATCH] [SPARC64]: Inline membar()'s again.

Since GCC has to emit a call and a delay slot to the
out-of-line "membar" routines in arch/sparc64/lib/mb.S
it is much better to just do the necessary predicted
branch inline instead as:

	ba,pt	%xcc, 1f
	 membar	#whatever
1:

instead of the current:

	call	membar_foo
	 dslot

because this way GCC is not required to allocate a stack
frame if the function can be a leaf function.

This also makes this bug fix easier to backport to 2.4.x

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc64/kernel/sparc64_ksyms.c |  9 ----
 arch/sparc64/lib/Makefile           |  2 +-
 arch/sparc64/lib/mb.S               | 73 -----------------------------
 include/asm-sparc64/system.h        | 49 ++++++++++++++++---
 4 files changed, 43 insertions(+), 90 deletions(-)
 delete mode 100644 arch/sparc64/lib/mb.S

diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index d89fc24808d3..7d9a0f6c437d 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -403,12 +403,3 @@ EXPORT_SYMBOL(xor_vis_4);
 EXPORT_SYMBOL(xor_vis_5);
 
 EXPORT_SYMBOL(prom_palette);
-
-/* memory barriers */
-EXPORT_SYMBOL(mb);
-EXPORT_SYMBOL(rmb);
-EXPORT_SYMBOL(wmb);
-EXPORT_SYMBOL(membar_storeload);
-EXPORT_SYMBOL(membar_storeload_storestore);
-EXPORT_SYMBOL(membar_storeload_loadload);
-EXPORT_SYMBOL(membar_storestore_loadstore);
diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile
index 6201f1040982..40dbeec7e5d6 100644
--- a/arch/sparc64/lib/Makefile
+++ b/arch/sparc64/lib/Makefile
@@ -12,7 +12,7 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \
 	 U1memcpy.o U1copy_from_user.o U1copy_to_user.o \
 	 U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \
 	 copy_in_user.o user_fixup.o memmove.o \
-	 mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o mb.o
+	 mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o
 
 lib-$(CONFIG_DEBUG_SPINLOCK) += debuglocks.o
 lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
diff --git a/arch/sparc64/lib/mb.S b/arch/sparc64/lib/mb.S
deleted file mode 100644
index 4004f748619f..000000000000
--- a/arch/sparc64/lib/mb.S
+++ /dev/null
@@ -1,73 +0,0 @@
-/* mb.S: Out of line memory barriers.
- *
- * Copyright (C) 2005 David S. Miller (davem@davemloft.net)
- */
-
-	/* These are here in an effort to more fully work around
-	 * Spitfire Errata #51.  Essentially, if a memory barrier
-	 * occurs soon after a mispredicted branch, the chip can stop
-	 * executing instructions until a trap occurs.  Therefore, if
-	 * interrupts are disabled, the chip can hang forever.
-	 *
-	 * It used to be believed that the memory barrier had to be
-	 * right in the delay slot, but a case has been traced
-	 * recently wherein the memory barrier was one instruction
-	 * after the branch delay slot and the chip still hung.  The
-	 * offending sequence was the following in sym_wakeup_done()
-	 * of the sym53c8xx_2 driver:
-	 *
-	 *	call	sym_ccb_from_dsa, 0
-	 *	 movge	%icc, 0, %l0
-	 *	brz,pn	%o0, .LL1303
-	 *	 mov	%o0, %l2
-	 *	membar	#LoadLoad
-	 *
-	 * The branch has to be mispredicted for the bug to occur.
-	 * Therefore, we put the memory barrier explicitly into a
-	 * "branch always, predicted taken" delay slot to avoid the
-	 * problem case.
-	 */
-
-	.text
-
-99:	retl
-	 nop
-
-	.globl	mb
-mb:	ba,pt	%xcc, 99b
-	 membar	#LoadLoad | #LoadStore | #StoreStore | #StoreLoad
-	.size	mb, .-mb
-
-	.globl	rmb
-rmb:	ba,pt	%xcc, 99b
-	 membar	#LoadLoad
-	.size	rmb, .-rmb
-
-	.globl	wmb
-wmb:	ba,pt	%xcc, 99b
-	 membar	#StoreStore
-	.size	wmb, .-wmb
-
-	.globl	membar_storeload
-membar_storeload:
-	ba,pt	%xcc, 99b
-	 membar	#StoreLoad
-	.size	membar_storeload, .-membar_storeload
-
-	.globl	membar_storeload_storestore
-membar_storeload_storestore:
-	ba,pt	%xcc, 99b
-	 membar	#StoreLoad | #StoreStore
-	.size	membar_storeload_storestore, .-membar_storeload_storestore
-
-	.globl	membar_storeload_loadload
-membar_storeload_loadload:
-	ba,pt	%xcc, 99b
-	 membar	#StoreLoad | #LoadLoad
-	.size	membar_storeload_loadload, .-membar_storeload_loadload
-
-	.globl	membar_storestore_loadstore
-membar_storestore_loadstore:
-	ba,pt	%xcc, 99b
-	 membar	#StoreStore | #LoadStore
-	.size	membar_storestore_loadstore, .-membar_storestore_loadstore
diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h
index 5e94c05dc2fc..b5417529f6f1 100644
--- a/include/asm-sparc64/system.h
+++ b/include/asm-sparc64/system.h
@@ -28,13 +28,48 @@ enum sparc_cpu {
 #define ARCH_SUN4C_SUN4 0
 #define ARCH_SUN4 0
 
-extern void mb(void);
-extern void rmb(void);
-extern void wmb(void);
-extern void membar_storeload(void);
-extern void membar_storeload_storestore(void);
-extern void membar_storeload_loadload(void);
-extern void membar_storestore_loadstore(void);
+/* These are here in an effort to more fully work around Spitfire Errata
+ * #51.  Essentially, if a memory barrier occurs soon after a mispredicted
+ * branch, the chip can stop executing instructions until a trap occurs.
+ * Therefore, if interrupts are disabled, the chip can hang forever.
+ *
+ * It used to be believed that the memory barrier had to be right in the
+ * delay slot, but a case has been traced recently wherein the memory barrier
+ * was one instruction after the branch delay slot and the chip still hung.
+ * The offending sequence was the following in sym_wakeup_done() of the
+ * sym53c8xx_2 driver:
+ *
+ *	call	sym_ccb_from_dsa, 0
+ *	 movge	%icc, 0, %l0
+ *	brz,pn	%o0, .LL1303
+ *	 mov	%o0, %l2
+ *	membar	#LoadLoad
+ *
+ * The branch has to be mispredicted for the bug to occur.  Therefore, we put
+ * the memory barrier explicitly into a "branch always, predicted taken"
+ * delay slot to avoid the problem case.
+ */
+#define membar_safe(type) \
+do {	__asm__ __volatile__("ba,pt	%%xcc, 1f\n\t" \
+			     " membar	" type "\n" \
+			     "1:\n" \
+			     : : : "memory"); \
+} while (0)
+
+#define mb()	\
+	membar_safe("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad")
+#define rmb()	\
+	membar_safe("#LoadLoad")
+#define wmb()	\
+	membar_safe("#StoreStore")
+#define membar_storeload() \
+	membar_safe("#StoreLoad")
+#define membar_storeload_storestore() \
+	membar_safe("#StoreLoad | #StoreStore")
+#define membar_storeload_loadload() \
+	membar_safe("#StoreLoad | #LoadLoad")
+#define membar_storestore_loadstore() \
+	membar_safe("#StoreStore | #LoadStore")
 
 #endif
 
-- 
2.34.1