From fe2a6c584a62508e7e7ab990a16bf84af51ce52e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 29 Nov 2011 05:37:58 +0000 Subject: [PATCH] Fix VINSERTF128/VEXTRACTF128 to be marked as FP instructions. Allow execution dependency fix pass to convert them to their integer equivalents when AVX2 is enabled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145376 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 10 +++++++--- lib/Target/X86/X86InstrSSE.td | 4 ++-- test/CodeGen/X86/avx-cast.ll | 2 +- test/CodeGen/X86/avx-intrinsics-x86.ll | 3 ++- test/CodeGen/X86/avx-splat.ll | 2 +- test/CodeGen/X86/avx2-intrinsics-x86.ll | 10 ++++++++++ 6 files changed, 23 insertions(+), 8 deletions(-) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 5d310af3eb6..b28f9c1e7c1 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3568,10 +3568,14 @@ static const unsigned ReplaceableInstrsAVX2[][3] = { { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDYrr }, { X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm }, { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr }, - { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm }, - { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr }, { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm }, - { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr } + { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr }, + { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr }, + { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr }, + { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm }, + { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr }, + { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm }, + { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr } }; // FIXME: Some shuffle and unpack instructions have equivalents in different diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 0be59ccd87b..1d8e3ce284d 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7147,7 +7147,7 @@ def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), //===----------------------------------------------------------------------===// // VINSERTF128 - Insert packed floating-point values // -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR128:$src2, i8imm:$src3), "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", @@ -7194,7 +7194,7 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), //===----------------------------------------------------------------------===// // VEXTRACTF128 - Extract packed floating-point values // -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), (ins VR256:$src1, i8imm:$src2), "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", diff --git a/test/CodeGen/X86/avx-cast.ll b/test/CodeGen/X86/avx-cast.ll index d6d2415ea05..32d450cac9f 100644 --- a/test/CodeGen/X86/avx-cast.ll +++ b/test/CodeGen/X86/avx-cast.ll @@ -16,7 +16,7 @@ entry: ret <4 x double> %shuffle.i } -; CHECK: vpxor +; CHECK: vxorps ; CHECK-NEXT: vinsertf128 $0 define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp { entry: diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll index 57e73121549..eccc842406a 100644 --- a/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -2140,7 +2140,8 @@ declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { - ; CHECK: vmovdqu + ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions + ; CHECK: vmovups ; add operation forces the execution domain. %a2 = add <32 x i8> %a1, call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2) diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll index af20b90322e..f8522c26951 100644 --- a/test/CodeGen/X86/avx-splat.ll +++ b/test/CodeGen/X86/avx-splat.ll @@ -47,7 +47,7 @@ entry: ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> ; To: ; shuffle (vload ptr)), undef, <1, 1, 1, 1> -; CHECK: vmovdqa +; CHECK: vmovaps ; CHECK-NEXT: vinsertf128 $1 ; CHECK-NEXT: vpermilps $-1 define <8 x float> @funcE() nounwind { diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll index bab7fb81e53..a0f351de760 100644 --- a/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -1046,3 +1046,13 @@ define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) { ret <8 x i32> %res } declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone + +; This is checked here because the execution dependency fix pass makes it hard to test in AVX mode since we don't have 256-bit integer instructions +define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { + ; CHECK: vmovdqu + ; add operation forces the execution domain. + %a2 = add <32 x i8> %a1, + call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2) + ret void +} +declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind -- 2.34.1