From d7f7b7e355cc39b40235ac71bf4a41dce2c0a036 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 12 Dec 2015 12:52:52 +0000 Subject: [PATCH] [X86][AVX] Tests tidyup Cleanup/regenerate some tests for some upcoming patches. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255432 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/2012-01-12-extract-sv.ll | 28 +++--- test/CodeGen/X86/vec_extract-avx.ll | 109 +++++++++++----------- 2 files changed, 68 insertions(+), 69 deletions(-) diff --git a/test/CodeGen/X86/2012-01-12-extract-sv.ll b/test/CodeGen/X86/2012-01-12-extract-sv.ll index 677c902668b..92ec107a007 100644 --- a/test/CodeGen/X86/2012-01-12-extract-sv.ll +++ b/test/CodeGen/X86/2012-01-12-extract-sv.ll @@ -2,20 +2,20 @@ define void @endless_loop() { ; CHECK-LABEL: endless_loop: -; CHECK-NEXT: # BB#0: -; CHECK-NEXT: vmovaps (%eax), %ymm0 -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 -; CHECK-NEXT: vmovsldup %xmm0, %xmm0 # xmm0 = xmm0[0,0,2,2] -; CHECK-NEXT: vmovddup %xmm0, %xmm1 # xmm1 = xmm0[0,0] -; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vblendps $128, %ymm1, %ymm2, %ymm1 # ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7] -; CHECK-NEXT: vxorps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vblendps $1, %ymm0, %ymm2, %ymm0 # ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7] -; CHECK-NEXT: vmovaps %ymm0, (%eax) -; CHECK-NEXT: vmovaps %ymm1, (%eax) -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retl +; CHECK-NEXT: # BB#0: +; CHECK-NEXT: vmovaps (%eax), %ymm0 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] +; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] +; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7] +; CHECK-NEXT: vxorps %ymm2, %ymm2, %ymm2 +; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7] +; CHECK-NEXT: vmovaps %ymm0, (%eax) +; CHECK-NEXT: vmovaps %ymm1, (%eax) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retl entry: %0 = load <8 x i32>, <8 x i32> addrspace(1)* undef, align 32 %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <16 x i32> diff --git a/test/CodeGen/X86/vec_extract-avx.ll b/test/CodeGen/X86/vec_extract-avx.ll index ea34fde4a0e..abb07233d35 100644 --- a/test/CodeGen/X86/vec_extract-avx.ll +++ b/test/CodeGen/X86/vec_extract-avx.ll @@ -1,6 +1,5 @@ -target triple = "x86_64-unknown-unknown" - -; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s ; When extracting multiple consecutive elements from a larger ; vector into a smaller one, do it efficiently. We should use @@ -9,6 +8,11 @@ target triple = "x86_64-unknown-unknown" ; Extracting the low elements only requires using the right kind of store. define void @low_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) { +; CHECK-LABEL: low_v8f32_to_v4f32: +; CHECK: # BB#0: +; CHECK-NEXT: vmovaps %xmm0, (%rdi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq %ext0 = extractelement <8 x float> %v, i32 0 %ext1 = extractelement <8 x float> %v, i32 1 %ext2 = extractelement <8 x float> %v, i32 2 @@ -19,15 +23,15 @@ define void @low_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) { %ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3 store <4 x float> %ins3, <4 x float>* %ptr, align 16 ret void - -; CHECK-LABEL: low_v8f32_to_v4f32 -; CHECK: vmovaps -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq } ; Extracting the high elements requires just one AVX instruction. define void @high_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) { +; CHECK-LABEL: high_v8f32_to_v4f32: +; CHECK: # BB#0: +; CHECK-NEXT: vextractf128 $1, %ymm0, (%rdi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq %ext0 = extractelement <8 x float> %v, i32 4 %ext1 = extractelement <8 x float> %v, i32 5 %ext2 = extractelement <8 x float> %v, i32 6 @@ -38,17 +42,17 @@ define void @high_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) { %ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3 store <4 x float> %ins3, <4 x float>* %ptr, align 16 ret void - -; CHECK-LABEL: high_v8f32_to_v4f32 -; CHECK: vextractf128 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq } ; Make sure element type doesn't alter the codegen. Note that ; if we were actually using the vector in this function and ; have AVX2, we should generate vextracti128 (the int version). define void @high_v8i32_to_v4i32(<8 x i32> %v, <4 x i32>* %ptr) { +; CHECK-LABEL: high_v8i32_to_v4i32: +; CHECK: # BB#0: +; CHECK-NEXT: vextractf128 $1, %ymm0, (%rdi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq %ext0 = extractelement <8 x i32> %v, i32 4 %ext1 = extractelement <8 x i32> %v, i32 5 %ext2 = extractelement <8 x i32> %v, i32 6 @@ -59,91 +63,86 @@ define void @high_v8i32_to_v4i32(<8 x i32> %v, <4 x i32>* %ptr) { %ins3 = insertelement <4 x i32> %ins2, i32 %ext3, i32 3 store <4 x i32> %ins3, <4 x i32>* %ptr, align 16 ret void - -; CHECK-LABEL: high_v8i32_to_v4i32 -; CHECK: vextractf128 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq } ; Make sure that element size doesn't alter the codegen. define void @high_v4f64_to_v2f64(<4 x double> %v, <2 x double>* %ptr) { +; CHECK-LABEL: high_v4f64_to_v2f64: +; CHECK: # BB#0: +; CHECK-NEXT: vextractf128 $1, %ymm0, (%rdi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq %ext0 = extractelement <4 x double> %v, i32 2 %ext1 = extractelement <4 x double> %v, i32 3 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 store <2 x double> %ins1, <2 x double>* %ptr, align 16 ret void - -; CHECK-LABEL: high_v4f64_to_v2f64 -; CHECK: vextractf128 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq } ; PR25320 Make sure that a widened (possibly legalized) vector correctly zero-extends upper elements. ; FIXME - Ideally these should just call VMOVD/VMOVQ/VMOVSS/VMOVSD define void @legal_vzmovl_2i32_8i32(<2 x i32>* %in, <8 x i32>* %out) { +; CHECK-LABEL: legal_vzmovl_2i32_8i32: +; CHECK: # BB#0: +; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero +; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1 +; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] +; CHECK-NEXT: vmovaps %ymm0, (%rsi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq %ld = load <2 x i32>, <2 x i32>* %in, align 8 %ext = extractelement <2 x i32> %ld, i64 0 %ins = insertelement <8 x i32> , i32 %ext, i64 0 store <8 x i32> %ins, <8 x i32>* %out, align 32 ret void - -; CHECK-LABEL: legal_vzmovl_2i32_8i32 -; CHECK: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero -; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1 -; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; CHECK-NEXT: vmovaps %ymm0, (%rsi) -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq } define void @legal_vzmovl_2i64_4i64(<2 x i64>* %in, <4 x i64>* %out) { +; CHECK-LABEL: legal_vzmovl_2i64_4i64: +; CHECK: # BB#0: +; CHECK-NEXT: vmovupd (%rdi), %xmm0 +; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 +; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] +; CHECK-NEXT: vmovapd %ymm0, (%rsi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq %ld = load <2 x i64>, <2 x i64>* %in, align 8 %ext = extractelement <2 x i64> %ld, i64 0 %ins = insertelement <4 x i64> , i64 %ext, i64 0 store <4 x i64> %ins, <4 x i64>* %out, align 32 ret void - -; CHECK-LABEL: legal_vzmovl_2i64_4i64 -; CHECK: vmovupd (%rdi), %xmm0 -; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 -; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] -; CHECK-NEXT: vmovapd %ymm0, (%rsi) -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq } define void @legal_vzmovl_2f32_8f32(<2 x float>* %in, <8 x float>* %out) { +; CHECK-LABEL: legal_vzmovl_2f32_8f32: +; CHECK: # BB#0: +; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1 +; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] +; CHECK-NEXT: vmovaps %ymm0, (%rsi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq %ld = load <2 x float>, <2 x float>* %in, align 8 %ext = extractelement <2 x float> %ld, i64 0 %ins = insertelement <8 x float> , float %ext, i64 0 store <8 x float> %ins, <8 x float>* %out, align 32 ret void - -; CHECK-LABEL: legal_vzmovl_2f32_8f32 -; CHECK: vmovq {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1 -; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; CHECK-NEXT: vmovaps %ymm0, (%rsi) -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq } define void @legal_vzmovl_2f64_4f64(<2 x double>* %in, <4 x double>* %out) { +; CHECK-LABEL: legal_vzmovl_2f64_4f64: +; CHECK: # BB#0: +; CHECK-NEXT: vmovupd (%rdi), %xmm0 +; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 +; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] +; CHECK-NEXT: vmovapd %ymm0, (%rsi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq %ld = load <2 x double>, <2 x double>* %in, align 8 %ext = extractelement <2 x double> %ld, i64 0 %ins = insertelement <4 x double> , double %ext, i64 0 store <4 x double> %ins, <4 x double>* %out, align 32 ret void - -; CHECK-LABEL: legal_vzmovl_2f64_4f64 -; CHECK: vmovupd (%rdi), %xmm0 -; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 -; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] -; CHECK-NEXT: vmovapd %ymm0, (%rsi) -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq } -- 2.34.1