teach scev to analyze X*4|1 like X*4+c. This allows us to produce:
LBB1_1: #bb
movdqa (%esi), %xmm2
movaps %xmm2, %xmm3
punpcklbw %xmm0, %xmm3
movaps %xmm3, %xmm4
punpcklwd %xmm0, %xmm4
cvtdq2ps %xmm4, %xmm4
mulps %xmm1, %xmm4
movaps %xmm4, (%edi)
leal 1(,%eax,4), %ebx
shll $4, %ebx
punpckhwd %xmm0, %xmm3
cvtdq2ps %xmm3, %xmm3
mulps %xmm1, %xmm3
movaps %xmm3, (%edx,%ebx)
leal 2(,%eax,4), %ebx
shll $4, %ebx
punpckhbw %xmm0, %xmm2
movaps %xmm2, %xmm3
punpcklwd %xmm0, %xmm3
cvtdq2ps %xmm3, %xmm3
mulps %xmm1, %xmm3
movaps %xmm3, (%edx,%ebx)
leal 3(,%eax,4), %ebx
shll $4, %ebx
punpckhwd %xmm0, %xmm2
cvtdq2ps %xmm2, %xmm2
mulps %xmm1, %xmm2
movaps %xmm2, (%edx,%ebx)
addl $64, %edi
incl %eax
addl $16, %esi
cmpl %ecx, %eax
jne LBB1_1 #bb
instead of:
LBB1_1: #bb
movdqa (%esi), %xmm2
movaps %xmm2, %xmm3
punpcklbw %xmm0, %xmm3
movaps %xmm3, %xmm4
punpcklwd %xmm0, %xmm4
cvtdq2ps %xmm4, %xmm4
mulps %xmm1, %xmm4
movaps %xmm4, (%edi)
leal 1(,%eax,4), %ebx
shll $4, %ebx
punpckhwd %xmm0, %xmm3
cvtdq2ps %xmm3, %xmm3
mulps %xmm1, %xmm3
movaps %xmm3, (%edx,%ebx)
leal 2(,%eax,4), %ebx
shll $4, %ebx
punpckhbw %xmm0, %xmm2
movaps %xmm2, %xmm3
punpcklwd %xmm0, %xmm3
cvtdq2ps %xmm3, %xmm3
mulps %xmm1, %xmm3
movaps %xmm3, (%edx,%ebx)
leal 3(,%eax,4), %ebx
shll $4, %ebx
punpckhwd %xmm0, %xmm2
cvtdq2ps %xmm2, %xmm2
mulps %xmm1, %xmm2
movaps %xmm2, (%edx,%ebx)
addl $64, %edi
incl %eax
addl $16, %esi
cmpl %ecx, %eax
jne LBB1_1 #bb
for a testcase.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@32463
91177308-0d34-0410-b5e6-
96231b3b80d8