From 0239b7553bd7dc0071475e9bf63effa983690666 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Dec 2015 09:09:54 +0000 Subject: [PATCH] [X86][AVX] Added tests to load+broadcast non-zero'th vector elements Baseline for an upcoming patch for PR23022 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254898 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/avx-vbroadcast.ll | 153 +++++++++++++++++++ test/CodeGen/X86/avx2-vbroadcast.ll | 225 +++++++++++++++++++++++++++- 2 files changed, 375 insertions(+), 3 deletions(-) diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll index bfc9149b107..5c0f43da876 100644 --- a/test/CodeGen/X86/avx-vbroadcast.ll +++ b/test/CodeGen/X86/avx-vbroadcast.ll @@ -102,6 +102,159 @@ entry: ret <4 x i32> %vecinit6.i } +; FIXME: Pointer adjusted broadcasts + +define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4i32_4i32_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,1,1,1] +; CHECK-NEXT: retq +entry: + %ld = load <4 x i32>, <4 x i32>* %ptr + %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> + ret <4 x i32> %ret +} + +define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8i32_4i32_33333333: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <4 x i32>, <4 x i32>* %ptr + %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> + ret <8 x i32> %ret +} + +define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8i32_8i32_55555555: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovaps (%rdi), %ymm0 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <8 x i32>, <8 x i32>* %ptr + %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> + ret <8 x i32> %ret +} + +define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4f32_4f32_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,1,1,1] +; CHECK-NEXT: retq +entry: + %ld = load <4 x float>, <4 x float>* %ptr + %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> + ret <4 x float> %ret +} + +define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8f32_4f32_33333333: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <4 x float>, <4 x float>* %ptr + %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> + ret <8 x float> %ret +} + +define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8f32_8f32_55555555: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovaps (%rdi), %ymm0 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <8 x float>, <8 x float>* %ptr + %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> + ret <8 x float> %ret +} + +define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_2i64_2i64_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3] +; CHECK-NEXT: retq +entry: + %ld = load <2 x i64>, <2 x i64>* %ptr + %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> + ret <2 x i64> %ret +} + +define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4i64_2i64_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <2 x i64>, <2 x i64>* %ptr + %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> + ret <4 x i64> %ret +} + +define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4i64_4i64_2222: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovapd (%rdi), %ymm0 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <4 x i64>, <4 x i64>* %ptr + %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> + ret <4 x i64> %ret +} + +define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_2f64_2f64_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: retq +entry: + %ld = load <2 x double>, <2 x double>* %ptr + %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> + ret <2 x double> %ret +} + +define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4f64_2f64_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <2 x double>, <2 x double>* %ptr + %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> + ret <4 x double> %ret +} + +define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4f64_4f64_2222: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovapd (%rdi), %ymm0 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <4 x double>, <4 x double>* %ptr + %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> + ret <4 x double> %ret +} + ; Unsupported vbroadcasts define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp { diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll index 418707cdc23..186f5087365 100644 --- a/test/CodeGen/X86/avx2-vbroadcast.ll +++ b/test/CodeGen/X86/avx2-vbroadcast.ll @@ -172,6 +172,225 @@ entry: ret <4 x i64> %q3 } +; FIXME: Pointer adjusted broadcasts + +define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_16i8_16i8_1111111111111111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; CHECK-NEXT: retq +entry: + %ld = load <16 x i8>, <16 x i8>* %ptr + %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> + ret <16 x i8> %ret +} + +define <32 x i8> @load_splat_32i8_16i8_11111111111111111111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <16 x i8>, <16 x i8>* %ptr + %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> + ret <32 x i8> %ret +} + +define <32 x i8> @load_splat_32i8_32i8_11111111111111111111111111111111(<32 x i8>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovdqa (%rdi), %ymm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <32 x i8>, <32 x i8>* %ptr + %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> + ret <32 x i8> %ret +} + +define <8 x i16> @load_splat_8i16_8i16_11111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8i16_8i16_11111111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] +; CHECK-NEXT: retq +entry: + %ld = load <8 x i16>, <8 x i16>* %ptr + %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> + ret <8 x i16> %ret +} + +define <16 x i16> @load_splat_16i16_8i16_1111111111111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_16i16_8i16_1111111111111111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] +; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <8 x i16>, <8 x i16>* %ptr + %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> + ret <16 x i16> %ret +} + +define <16 x i16> @load_splat_16i16_16i16_1111111111111111(<16 x i16>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_16i16_16i16_1111111111111111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovdqa (%rdi), %ymm0 +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] +; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <16 x i16>, <16 x i16>* %ptr + %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> + ret <16 x i16> %ret +} + +define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4i32_4i32_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,1,1,1] +; CHECK-NEXT: retq +entry: + %ld = load <4 x i32>, <4 x i32>* %ptr + %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> + ret <4 x i32> %ret +} + +define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8i32_4i32_33333333: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 +; CHECK-NEXT: vpbroadcastd LCPI15_0(%rip), %ymm1 +; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <4 x i32>, <4 x i32>* %ptr + %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> + ret <8 x i32> %ret +} + +define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8i32_8i32_55555555: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpbroadcastd LCPI16_0(%rip), %ymm0 +; CHECK-NEXT: vpermd (%rdi), %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <8 x i32>, <8 x i32>* %ptr + %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> + ret <8 x i32> %ret +} + +define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4f32_4f32_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,1,1,1] +; CHECK-NEXT: retq +entry: + %ld = load <4 x float>, <4 x float>* %ptr + %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> + ret <4 x float> %ret +} + +define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8f32_4f32_33333333: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: vbroadcastss LCPI18_0(%rip), %ymm1 +; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <4 x float>, <4 x float>* %ptr + %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> + ret <8 x float> %ret +} + +define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8f32_8f32_55555555: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastss LCPI19_0(%rip), %ymm0 +; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <8 x float>, <8 x float>* %ptr + %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> + ret <8 x float> %ret +} + +define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_2i64_2i64_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3] +; CHECK-NEXT: retq +entry: + %ld = load <2 x i64>, <2 x i64>* %ptr + %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> + ret <2 x i64> %ret +} + +define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4i64_2i64_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 +; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,1,1] +; CHECK-NEXT: retq +entry: + %ld = load <2 x i64>, <2 x i64>* %ptr + %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> + ret <4 x i64> %ret +} + +define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4i64_4i64_2222: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpermq {{.*#+}} ymm0 = mem[2,2,2,2] +; CHECK-NEXT: retq +entry: + %ld = load <4 x i64>, <4 x i64>* %ptr + %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> + ret <4 x i64> %ret +} + +define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_2f64_2f64_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: retq +entry: + %ld = load <2 x double>, <2 x double>* %ptr + %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> + ret <2 x double> %ret +} + +define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4f64_2f64_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovapd (%rdi), %xmm0 +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1] +; CHECK-NEXT: retq +entry: + %ld = load <2 x double>, <2 x double>* %ptr + %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> + ret <4 x double> %ret +} + +define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4f64_4f64_2222: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,2,2,2] +; CHECK-NEXT: retq +entry: + %ld = load <4 x double>, <4 x double>* %ptr + %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> + ret <4 x double> %ret +} + ; make sure that we still don't support broadcast double into 128-bit vector ; this used to crash define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp { @@ -242,13 +461,13 @@ define void @crash() nounwind alwaysinline { ; CHECK: ## BB#0: ## %WGLoopsEntry ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je LBB13_1 +; CHECK-NEXT: je LBB31_1 ; CHECK-NEXT: ## BB#2: ## %ret ; CHECK-NEXT: retq ; CHECK-NEXT: .align 4, 0x90 -; CHECK-NEXT: LBB13_1: ## %footer349VF +; CHECK-NEXT: LBB31_1: ## %footer349VF ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jmp LBB13_1 +; CHECK-NEXT: jmp LBB31_1 WGLoopsEntry: br i1 undef, label %ret, label %footer329VF -- 2.34.1