This is the AVX extension of r235014:
http://llvm.org/viewvc/llvm-project?view=revision&revision=235014
Review:
http://reviews.llvm.org/D8691
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235210
91177308-0d34-0410-b5e6-
96231b3b80d8
{ X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr },
{ X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr },
{ X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm },
- // TODO: Add the AVX versions of MOVLPSmr
+ { X86::VMOVLPSmr, X86::VMOVLPDmr, X86::VMOVPQI2QImr },
{ X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
{ X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
{ X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
; CHECK-LABEL: test_x86_sse2_storel_dq:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: vmovq %xmm0, (%eax)
+; CHECK-NEXT: vmovlps %xmm0, (%eax)
; CHECK-NEXT: retl
call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
ret void
; AVX-LABEL: store_floats:
; AVX: # BB#0:
; AVX-NEXT: vaddps %xmm0, %xmm0, %xmm0
-
-
-; !!! FIXME - the AVX version is not handled correctly.
-; AVX-NEXT: vmovq %xmm0, (%rdi)
-
-
+; AVX-NEXT: vmovlps %xmm0, (%rdi)
; AVX-NEXT: retq
%a = fadd <4 x float> %x, %x
%b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
; Swap Imm and Real.
; STRESS-NEXT: vinsertps $16, [[RES_Imm]], [[RES_Real]], [[RES_Vec:%xmm[0-9]+]]
; Put the results back into out[out_start].
-; STRESS-NEXT: vmovq [[RES_Vec]], ([[BASE]])
+; STRESS-NEXT: vmovlps [[RES_Vec]], ([[BASE]])
;
; Same for REGULAR, we eliminate register bank copy with each slices.
; REGULAR-LABEL: t1:
; Swap Imm and Real.
; REGULAR-NEXT: vinsertps $16, [[RES_Imm]], [[RES_Real]], [[RES_Vec:%xmm[0-9]+]]
; Put the results back into out[out_start].
-; REGULAR-NEXT: vmovq [[RES_Vec]], ([[BASE]])
+; REGULAR-NEXT: vmovlps [[RES_Vec]], ([[BASE]])
define void @t1(%class.Complex* nocapture %out, i64 %out_start) {
entry:
%arrayidx = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %out_start
; SSE: movaps %xmm0, (%rsp)
; SSE: callq killcommon
-; AVX: vmovdqa compl+128(%rip), %xmm0
-; AVX: vmovdqa %xmm0, (%rsp)
+; AVX: vmovaps compl+128(%rip), %xmm0
+; AVX: vmovaps %xmm0, (%rsp)
; AVX: callq killcommon
@compl = linkonce global [20 x i64] zeroinitializer, align 64 ; <[20 x i64]*> [#uses=1]