From: H. Peter Anvin Date: Mon, 17 May 2010 22:13:23 +0000 (-0700) Subject: x86, hweight: Use a 32-bit popcnt for __arch_hweight32() X-Git-Tag: firefly_0821_release~9833^2~2126^2 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=c59bd5688299cddb71183e156e7a3c1409b90df2;p=firefly-linux-kernel-4.4.55.git x86, hweight: Use a 32-bit popcnt for __arch_hweight32() Use a 32-bit popcnt instruction for __arch_hweight32(), even on x86-64. Even though the input register will *usually* be zero-extended due to the standard operation of the hardware, it isn't necessarily so if the input value was the result of truncating a 64-bit operation. Note: the POPCNT32 variant used on x86-64 has a technically unnecessary REX prefix to make it five bytes long, the same as a CALL instruction, therefore avoiding an unnecessary NOP. Reported-by: Linus Torvalds Signed-off-by: H. Peter Anvin Cc: Borislav Petkov LKML-Reference: --- diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h index d1fc3c219ae6..9686c3d9ff73 100644 --- a/arch/x86/include/asm/arch_hweight.h +++ b/arch/x86/include/asm/arch_hweight.h @@ -2,13 +2,15 @@ #define _ASM_X86_HWEIGHT_H #ifdef CONFIG_64BIT +/* popcnt %edi, %eax -- redundant REX prefix for alignment */ +#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" /* popcnt %rdi, %rax */ -#define POPCNT ".byte 0xf3,0x48,0x0f,0xb8,0xc7" +#define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7" #define REG_IN "D" #define REG_OUT "a" #else /* popcnt %eax, %eax */ -#define POPCNT ".byte 0xf3,0x0f,0xb8,0xc0" +#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc0" #define REG_IN "a" #define REG_OUT "a" #endif @@ -23,7 +25,7 @@ static inline unsigned int __arch_hweight32(unsigned int w) { unsigned int res = 0; - asm (ALTERNATIVE("call __sw_hweight32", POPCNT, X86_FEATURE_POPCNT) + asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT) : "="REG_OUT (res) : REG_IN (w)); @@ -48,7 +50,7 @@ static inline unsigned long __arch_hweight64(__u64 w) return __arch_hweight32((u32)w) + __arch_hweight32((u32)(w >> 32)); #else - asm (ALTERNATIVE("call __sw_hweight64", POPCNT, X86_FEATURE_POPCNT) + asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT) : "="REG_OUT (res) : REG_IN (w)); #endif /* CONFIG_X86_32 */