ARM: move heavy barrier support out of line
authorRussell King <rmk+kernel@arm.linux.org.uk>
Mon, 1 Jun 2015 22:44:46 +0000 (23:44 +0100)
committerRussell King <rmk+kernel@arm.linux.org.uk>
Sat, 25 Jul 2015 14:28:05 +0000 (15:28 +0100)
The existing memory barrier macro causes a significant amount of code
to be inserted inline at every call site.  For example, in
gpio_set_irq_type(), we have this for mb():

c0344c08:       f57ff04e        dsb     st
c0344c0c:       e59f8190        ldr     r8, [pc, #400]  ; c0344da4 <gpio_set_irq_type+0x230>
c0344c10:       e3590004        cmp     r9, #4
c0344c14:       e5983014        ldr     r3, [r8, #20]
c0344c18:       0a000054        beq     c0344d70 <gpio_set_irq_type+0x1fc>
c0344c1c:       e3530000        cmp     r3, #0
c0344c20:       0a000004        beq     c0344c38 <gpio_set_irq_type+0xc4>
c0344c24:       e50b2030        str     r2, [fp, #-48]  ; 0xffffffd0
c0344c28:       e50bc034        str     ip, [fp, #-52]  ; 0xffffffcc
c0344c2c:       e12fff33        blx     r3
c0344c30:       e51bc034        ldr     ip, [fp, #-52]  ; 0xffffffcc
c0344c34:       e51b2030        ldr     r2, [fp, #-48]  ; 0xffffffd0
c0344c38:       e5963004        ldr     r3, [r6, #4]

Moving the outer_cache_sync() call out of line reduces the impact of
the barrier:

c0344968:       f57ff04e        dsb     st
c034496c:       e35a0004        cmp     sl, #4
c0344970:       e50b2030        str     r2, [fp, #-48]  ; 0xffffffd0
c0344974:       0a000044        beq     c0344a8c <gpio_set_irq_type+0x1b8>
c0344978:       ebf363dd        bl      c001d8f4 <arm_heavy_mb>
c034497c:       e5953004        ldr     r3, [r5, #4]

This should reduce the cache footprint of this code.  Overall, this
results in a reduction of around 20K in the kernel size:

    text    data      bss      dec     hex filename
10773970  667392 10369656 21811018 14ccf4a ../build/imx6/vmlinux-old
10754219  667392 10369656 21791267 14c8223 ../build/imx6/vmlinux-new

Another advantage to this approach is that we can finally resolve the
issue of SoCs which have their own memory barrier requirements within
multiplatform kernels (such as OMAP.)  Here, the bus interconnects
need additional handling to ensure that writes become visible in the
correct order (eg, between dma_map() operations, writes to DMA
coherent memory, and MMIO accesses.)

Acked-by: Tony Lindgren <tony@atomide.com>
Acked-by: Richard Woodruff <r-woodruff2@ti.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
arch/arm/include/asm/barrier.h
arch/arm/include/asm/outercache.h
arch/arm/kernel/irq.c
arch/arm/mach-mmp/pm-pxa910.c
arch/arm/mach-prima2/pm.c
arch/arm/mach-ux500/cache-l2x0.c
arch/arm/mm/Kconfig
arch/arm/mm/flush.c

index 6c2327e1c7323d79831af30bb296c55a9409e9bd..fea99b0e20878f6170e239aa31c7b3a692d3f5a5 100644 (file)
@@ -2,7 +2,6 @@
 #define __ASM_BARRIER_H
 
 #ifndef __ASSEMBLY__
-#include <asm/outercache.h>
 
 #define nop() __asm__ __volatile__("mov\tr0,r0\t@ nop\n\t");
 
 #define dmb(x) __asm__ __volatile__ ("" : : : "memory")
 #endif
 
+#ifdef CONFIG_ARM_HEAVY_MB
+extern void arm_heavy_mb(void);
+#define __arm_heavy_mb(x...) do { dsb(x); arm_heavy_mb(); } while (0)
+#else
+#define __arm_heavy_mb(x...) dsb(x)
+#endif
+
 #ifdef CONFIG_ARCH_HAS_BARRIERS
 #include <mach/barriers.h>
 #elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP)
-#define mb()           do { dsb(); outer_sync(); } while (0)
+#define mb()           __arm_heavy_mb()
 #define rmb()          dsb()
-#define wmb()          do { dsb(st); outer_sync(); } while (0)
+#define wmb()          __arm_heavy_mb(st)
 #define dma_rmb()      dmb(osh)
 #define dma_wmb()      dmb(oshst)
 #else
index 563b92fc2f41c3b2dd10bd56c1dd0f396cc8474a..c2bf24f40177ddce6ce472d75735a04b26e426fd 100644 (file)
@@ -129,21 +129,4 @@ static inline void outer_resume(void) { }
 
 #endif
 
-#ifdef CONFIG_OUTER_CACHE_SYNC
-/**
- * outer_sync - perform a sync point for outer cache
- *
- * Ensure that all outer cache operations are complete and any store
- * buffers are drained.
- */
-static inline void outer_sync(void)
-{
-       if (outer_cache.sync)
-               outer_cache.sync();
-}
-#else
-static inline void outer_sync(void)
-{ }
-#endif
-
 #endif /* __ASM_OUTERCACHE_H */
index 350f188c92d29447b59cbdcf62e0f98be5eed484..b96c8ed1723abf29ceb73f1771b67bf560d925ab 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/export.h>
 
 #include <asm/hardware/cache-l2x0.h>
+#include <asm/outercache.h>
 #include <asm/exception.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/irq.h>
index 04c9daf9f8d767a7225631a7f36e443fae260d88..7db5870d127fc0f2fda4f06c75055b6c269cf344 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <asm/mach-types.h>
+#include <asm/outercache.h>
 #include <mach/hardware.h>
 #include <mach/cputype.h>
 #include <mach/addr-map.h>
index d99d08eeb9664cff404e50bf03a2379e08d376b7..83e94c95e314414a6d85e145c9e204f186f9cc75 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/of_platform.h>
 #include <linux/io.h>
 #include <linux/rtc/sirfsoc_rtciobrg.h>
+#include <asm/outercache.h>
 #include <asm/suspend.h>
 #include <asm/hardware/cache-l2x0.h>
 
index 7557bede7ae67700c6cc65e593e94210bc93408d..780bd13cd7e3d936834c10a04ef5a44ff4a0e5bb 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 
+#include <asm/outercache.h>
 #include <asm/hardware/cache-l2x0.h>
 
 #include "db8500-regs.h"
index 7c6b976ab8d39d0f5005b9f8c815a752d1ab0f8b..df7537f12469a15669b89aba42bd96445e789305 100644 (file)
@@ -883,6 +883,7 @@ config OUTER_CACHE
 
 config OUTER_CACHE_SYNC
        bool
+       select ARM_HEAVY_MB
        help
          The outer cache has a outer_cache_fns.sync function pointer
          that can be used to drain the write buffer of the outer cache.
@@ -1031,6 +1032,9 @@ config ARCH_HAS_BARRIERS
          This option allows the use of custom mandatory barriers
          included via the mach/barriers.h file.
 
+config ARM_HEAVY_MB
+       bool
+
 config ARCH_SUPPORTS_BIG_ENDIAN
        bool
        help
index 34b66af516ea9ea83c8afb9ac7439467d80f9a3a..ce6c2960d5ace171a56149988deb38b1139dabdc 100644 (file)
 
 #include "mm.h"
 
+#ifdef CONFIG_ARM_HEAVY_MB
+void arm_heavy_mb(void)
+{
+#ifdef CONFIG_OUTER_CACHE_SYNC
+       if (outer_cache.sync)
+               outer_cache.sync();
+#endif
+}
+EXPORT_SYMBOL(arm_heavy_mb);
+#endif
+
 #ifdef CONFIG_CPU_CACHE_VIPT
 
 static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr)