powerpc/32: cacheable_memcpy becomes memcpy
authorLEROY Christophe <christophe.leroy@c-s.fr>
Tue, 19 May 2015 10:07:55 +0000 (12:07 +0200)
committerScott Wood <scottwood@freescale.com>
Sat, 8 Aug 2015 03:59:27 +0000 (22:59 -0500)
cacheable_memcpy uses dcbz instruction and is more efficient than
memcpy when the destination is in RAM. If the destination is in an
io area, memcpy_toio() is normally used, not memcpy

This patch renames memcpy as generic_memcpy, and renames
cacheable_memcpy as memcpy

On MPC885, we get approximatly 7% increase of the transfer rate
on an FTP reception

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Scott Wood <scottwood@freescale.com>
arch/powerpc/lib/copy_32.S

index 9262071a97077830de95feee6553bfc8de008f3b..1d49c740407ab5762bb4d749db80bf55c99c09e8 100644 (file)
@@ -129,13 +129,18 @@ _GLOBAL(memset)
  * We only use this version if the source and dest don't overlap.
  * -- paulus.
  */
-_GLOBAL(cacheable_memcpy)
+_GLOBAL(memmove)
+       cmplw   0,r3,r4
+       bgt     backwards_memcpy
+       /* fall through */
+
+_GLOBAL(memcpy)
        add     r7,r3,r5                /* test if the src & dst overlap */
        add     r8,r4,r5
        cmplw   0,r4,r7
        cmplw   1,r3,r8
        crand   0,0,4                   /* cr0.lt &= cr1.lt */
-       blt     memcpy                  /* if regions overlap */
+       blt     generic_memcpy          /* if regions overlap */
 
        addi    r4,r4,-4
        addi    r6,r3,-4
@@ -201,12 +206,7 @@ _GLOBAL(cacheable_memcpy)
        bdnz    40b
 65:    blr
 
-_GLOBAL(memmove)
-       cmplw   0,r3,r4
-       bgt     backwards_memcpy
-       /* fall through */
-
-_GLOBAL(memcpy)
+_GLOBAL(generic_memcpy)
        srwi.   r7,r5,3
        addi    r6,r3,-4
        addi    r4,r4,-4