powerpc/32: Few optimisations in memcpy
authorLEROY Christophe <christophe.leroy@c-s.fr>
Tue, 19 May 2015 10:07:57 +0000 (12:07 +0200)
committerScott Wood <scottwood@freescale.com>
Sat, 8 Aug 2015 03:59:29 +0000 (22:59 -0500)
This patch adds a few optimisations in memcpy functions by using
lbzu/stbu instead of lxb/stb and by re-ordering insn inside a loop
to reduce latency due to loading

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Scott Wood <scottwood@freescale.com>
arch/powerpc/lib/copy_32.S

index 1d49c740407ab5762bb4d749db80bf55c99c09e8..2ef50c6294709674ad69167b14ac8a5bede4631e 100644 (file)
@@ -155,9 +155,9 @@ _GLOBAL(memcpy)
        mtctr   r8
        beq+    61f
 70:    lbz     r9,4(r4)                /* do some bytes */
-       stb     r9,4(r6)
        addi    r4,r4,1
        addi    r6,r6,1
+       stb     r9,3(r6)
        bdnz    70b
 61:    srwi.   r0,r0,2
        mtctr   r0
@@ -199,10 +199,10 @@ _GLOBAL(memcpy)
 64:    andi.   r0,r5,3
        mtctr   r0
        beq+    65f
-40:    lbz     r0,4(r4)
-       stb     r0,4(r6)
-       addi    r4,r4,1
-       addi    r6,r6,1
+       addi    r4,r4,3
+       addi    r6,r6,3
+40:    lbzu    r0,1(r4)
+       stbu    r0,1(r6)
        bdnz    40b
 65:    blr