define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) {
; CHECK-LABEL: splat_load_4f64_2222:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovapd (%rdi), %ymm0
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
; CHECK-NEXT: retq
%x = load <4 x double>, <4 x double>* %ptr
%x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) {
; CHECK-LABEL: splat_load_4f32_0000:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
+; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
; CHECK-NEXT: retq
%x = load <4 x float>, <4 x float>* %ptr
%x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) {
; CHECK-LABEL: splat_load_8f32_77777777:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovaps (%rdi), %ymm0
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: vbroadcastss 28(%rdi), %ymm0
; CHECK-NEXT: retq
%x = load <8 x float>, <8 x float>* %ptr
%x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_4f32_4f32_1111:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,1,1,1]
+; CHECK-NEXT: vbroadcastss 4(%rdi), %xmm0
; CHECK-NEXT: retq
entry:
%ld = load <4 x float>, <4 x float>* %ptr
define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_8f32_4f32_33333333:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3]
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: vbroadcastss 12(%rdi), %ymm0
; CHECK-NEXT: retq
entry:
%ld = load <4 x float>, <4 x float>* %ptr
define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_8f32_8f32_55555555:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovaps (%rdi), %ymm0
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: vbroadcastss 20(%rdi), %ymm0
; CHECK-NEXT: retq
entry:
%ld = load <8 x float>, <8 x float>* %ptr
define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_4f64_2f64_1111:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovaps (%rdi), %xmm0
-; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: vbroadcastsd 8(%rdi), %ymm0
; CHECK-NEXT: retq
entry:
%ld = load <2 x double>, <2 x double>* %ptr
define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_4f64_4f64_2222:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovapd (%rdi), %ymm0
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
; CHECK-NEXT: retq
entry:
%ld = load <4 x double>, <4 x double>* %ptr
define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_16i8_16i8_1111111111111111:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovdqa (%rdi), %xmm0
-; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; CHECK-NEXT: vpbroadcastb 1(%rdi), %xmm0
; CHECK-NEXT: retq
entry:
%ld = load <16 x i8>, <16 x i8>* %ptr
define <32 x i8> @load_splat_32i8_16i8_11111111111111111111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovdqa (%rdi), %xmm0
-; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: vpbroadcastb 1(%rdi), %ymm0
; CHECK-NEXT: retq
entry:
%ld = load <16 x i8>, <16 x i8>* %ptr
define <32 x i8> @load_splat_32i8_32i8_11111111111111111111111111111111(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovdqa (%rdi), %ymm0
-; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: vpbroadcastb 1(%rdi), %ymm0
; CHECK-NEXT: retq
entry:
%ld = load <32 x i8>, <32 x i8>* %ptr
define <8 x i16> @load_splat_8i16_8i16_11111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_8i16_8i16_11111111:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovdqa (%rdi), %xmm0
-; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; CHECK-NEXT: vpbroadcastw 2(%rdi), %xmm0
; CHECK-NEXT: retq
entry:
%ld = load <8 x i16>, <8 x i16>* %ptr
define <16 x i16> @load_splat_16i16_8i16_1111111111111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_16i16_8i16_1111111111111111:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovdqa (%rdi), %xmm0
-; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
-; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: vpbroadcastw 2(%rdi), %ymm0
; CHECK-NEXT: retq
entry:
%ld = load <8 x i16>, <8 x i16>* %ptr
define <16 x i16> @load_splat_16i16_16i16_1111111111111111(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_16i16_16i16_1111111111111111:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovdqa (%rdi), %ymm0
-; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
-; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: vpbroadcastw 2(%rdi), %ymm0
; CHECK-NEXT: retq
entry:
%ld = load <16 x i16>, <16 x i16>* %ptr
define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_4i32_4i32_1111:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,1,1,1]
+; CHECK-NEXT: vbroadcastss 4(%rdi), %xmm0
; CHECK-NEXT: retq
entry:
%ld = load <4 x i32>, <4 x i32>* %ptr
define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_8i32_4i32_33333333:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovdqa (%rdi), %xmm0
-; CHECK-NEXT: vpbroadcastd LCPI15_0(%rip), %ymm1
-; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vbroadcastss 12(%rdi), %ymm0
; CHECK-NEXT: retq
entry:
%ld = load <4 x i32>, <4 x i32>* %ptr
define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_8i32_8i32_55555555:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd LCPI16_0(%rip), %ymm0
-; CHECK-NEXT: vpermd (%rdi), %ymm0, %ymm0
+; CHECK-NEXT: vbroadcastss 20(%rdi), %ymm0
; CHECK-NEXT: retq
entry:
%ld = load <8 x i32>, <8 x i32>* %ptr
define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_4f32_4f32_1111:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,1,1,1]
+; CHECK-NEXT: vbroadcastss 4(%rdi), %xmm0
; CHECK-NEXT: retq
entry:
%ld = load <4 x float>, <4 x float>* %ptr
define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_8f32_4f32_33333333:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovaps (%rdi), %xmm0
-; CHECK-NEXT: vbroadcastss LCPI18_0(%rip), %ymm1
-; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vbroadcastss 12(%rdi), %ymm0
; CHECK-NEXT: retq
entry:
%ld = load <4 x float>, <4 x float>* %ptr
define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_8f32_8f32_55555555:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastss LCPI19_0(%rip), %ymm0
-; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0
+; CHECK-NEXT: vbroadcastss 20(%rdi), %ymm0
; CHECK-NEXT: retq
entry:
%ld = load <8 x float>, <8 x float>* %ptr
define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_2i64_2i64_1111:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
+; CHECK-NEXT: vpbroadcastq 8(%rdi), %xmm0
; CHECK-NEXT: retq
entry:
%ld = load <2 x i64>, <2 x i64>* %ptr
define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_4i64_2i64_1111:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovdqa (%rdi), %xmm0
-; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,1,1]
+; CHECK-NEXT: vbroadcastsd 8(%rdi), %ymm0
; CHECK-NEXT: retq
entry:
%ld = load <2 x i64>, <2 x i64>* %ptr
define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_4i64_4i64_2222:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpermq {{.*#+}} ymm0 = mem[2,2,2,2]
+; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
; CHECK-NEXT: retq
entry:
%ld = load <4 x i64>, <4 x i64>* %ptr
define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_4f64_2f64_1111:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovapd (%rdi), %xmm0
-; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1]
+; CHECK-NEXT: vbroadcastsd 8(%rdi), %ymm0
; CHECK-NEXT: retq
entry:
%ld = load <2 x double>, <2 x double>* %ptr
define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
; CHECK-LABEL: load_splat_4f64_4f64_2222:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,2,2,2]
+; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
; CHECK-NEXT: retq
entry:
%ld = load <4 x double>, <4 x double>* %ptr