Lower X%C into X/C+stuff. This allows the 'division by a constant' logic to
apply to rems as well as divs. This fixes PR945 and speeds up ReedSolomon
from 14.57s to 10.90s (which is now faster than gcc).
It compiles CodeGen/X86/rem.ll into:
_test1:
subl $4, %esp
movl %esi, (%esp)
movl $
2155905153, %ecx
movl 8(%esp), %esi
movl %esi, %eax
imull %ecx
addl %esi, %edx
movl %edx, %eax
shrl $31, %eax
sarl $7, %edx
addl %eax, %edx
imull $255, %edx, %eax
subl %eax, %esi
movl %esi, %eax
movl (%esp), %esi
addl $4, %esp
ret
_test2:
movl 4(%esp), %eax
movl %eax, %ecx
sarl $31, %ecx
shrl $24, %ecx
addl %eax, %ecx
andl $
4294967040, %ecx
subl %ecx, %eax
ret
_test3:
subl $4, %esp
movl %esi, (%esp)
movl $
2155905153, %ecx
movl 8(%esp), %esi
movl %esi, %eax
mull %ecx
shrl $7, %edx
imull $255, %edx, %eax
subl %eax, %esi
movl %esi, %eax
movl (%esp), %esi
addl $4, %esp
ret
instead of div/idiv instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30920
91177308-0d34-0410-b5e6-
96231b3b80d8