percpucounter: Optimize __percpu_counter_add a bit through the use of this_cpu()...
authorChristoph Lameter <cl@linux.com>
Mon, 6 Dec 2010 17:16:19 +0000 (11:16 -0600)
committerTejun Heo <tj@kernel.org>
Fri, 17 Dec 2010 14:07:18 +0000 (15:07 +0100)
commit819a72af8d6653daa48334f24ce0a935ccdd33c7
treed834c9c7c66821d08ec965aab04d0c9ded263085
parente72df0b847adf064e64bcbd5141f0031524e723e
percpucounter: Optimize __percpu_counter_add a bit through the use of this_cpu() options.

The this_cpu_* options can be used to optimize __percpu_counter_add a bit. Avoids
some address arithmetic and saves 12 bytes.

Before:

00000000000001d3 <__percpu_counter_add>:
 1d3: 55                    push   %rbp
 1d4: 48 89 e5              mov    %rsp,%rbp
 1d7: 41 55                 push   %r13
 1d9: 41 54                 push   %r12
 1db: 53                    push   %rbx
 1dc: 48 89 fb              mov    %rdi,%rbx
 1df: 48 83 ec 08           sub    $0x8,%rsp
 1e3: 4c 8b 67 30           mov    0x30(%rdi),%r12
 1e7: 65 4c 03 24 25 00 00  add    %gs:0x0,%r12
 1ee: 00 00
 1f0: 4d 63 2c 24           movslq (%r12),%r13
 1f4: 48 63 c2              movslq %edx,%rax
 1f7: 49 01 f5              add    %rsi,%r13
 1fa: 49 39 c5              cmp    %rax,%r13
 1fd: 7d 0a                 jge    209 <__percpu_counter_add+0x36>
 1ff: f7 da                 neg    %edx
 201: 48 63 d2              movslq %edx,%rdx
 204: 49 39 d5              cmp    %rdx,%r13
 207: 7f 1e                 jg     227 <__percpu_counter_add+0x54>
 209: 48 89 df              mov    %rbx,%rdi
 20c: e8 00 00 00 00        callq  211 <__percpu_counter_add+0x3e>
 211: 4c 01 6b 18           add    %r13,0x18(%rbx)
 215: 48 89 df              mov    %rbx,%rdi
 218: 41 c7 04 24 00 00 00  movl   $0x0,(%r12)
 21f: 00
 220: e8 00 00 00 00        callq  225 <__percpu_counter_add+0x52>
 225: eb 04                 jmp    22b <__percpu_counter_add+0x58>
 227: 45 89 2c 24           mov    %r13d,(%r12)
 22b: 5b                    pop    %rbx
 22c: 5b                    pop    %rbx
 22d: 41 5c                 pop    %r12
 22f: 41 5d                 pop    %r13
 231: c9                    leaveq
 232: c3                    retq

After:

00000000000001d3 <__percpu_counter_add>:
 1d3: 55                    push   %rbp
 1d4: 48 63 ca              movslq %edx,%rcx
 1d7: 48 89 e5              mov    %rsp,%rbp
 1da: 41 54                 push   %r12
 1dc: 53                    push   %rbx
 1dd: 48 89 fb              mov    %rdi,%rbx
 1e0: 48 8b 47 30           mov    0x30(%rdi),%rax
 1e4: 65 44 8b 20           mov    %gs:(%rax),%r12d
 1e8: 4d 63 e4              movslq %r12d,%r12
 1eb: 49 01 f4              add    %rsi,%r12
 1ee: 49 39 cc              cmp    %rcx,%r12
 1f1: 7d 0a                 jge    1fd <__percpu_counter_add+0x2a>
 1f3: f7 da                 neg    %edx
 1f5: 48 63 d2              movslq %edx,%rdx
 1f8: 49 39 d4              cmp    %rdx,%r12
 1fb: 7f 21                 jg     21e <__percpu_counter_add+0x4b>
 1fd: 48 89 df              mov    %rbx,%rdi
 200: e8 00 00 00 00        callq  205 <__percpu_counter_add+0x32>
 205: 4c 01 63 18           add    %r12,0x18(%rbx)
 209: 48 8b 43 30           mov    0x30(%rbx),%rax
 20d: 48 89 df              mov    %rbx,%rdi
 210: 65 c7 00 00 00 00 00  movl   $0x0,%gs:(%rax)
 217: e8 00 00 00 00        callq  21c <__percpu_counter_add+0x49>
 21c: eb 04                 jmp    222 <__percpu_counter_add+0x4f>
 21e: 65 44 89 20           mov    %r12d,%gs:(%rax)
 222: 5b                    pop    %rbx
 223: 41 5c                 pop    %r12
 225: c9                    leaveq
 226: c3                    retq

Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
lib/percpu_counter.c