1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx2 | FileCheck %s
3 define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
5 %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1]
8 declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone
11 define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
13 %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1]
16 declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone
19 define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
21 %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1]
24 declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone
27 define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
29 %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1]
32 declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone
35 define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
36 ; CHECK: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
37 %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
40 declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
43 define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
44 ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
45 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
48 declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
51 define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
52 ; CHECK: vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
53 %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1]
56 declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
59 define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
60 ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero
61 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1]
64 declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
67 define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) {
68 ; CHECK-LABEL: test_x86_avx2_vextracti128:
71 %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7)
74 declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone
77 define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
78 ; CHECK-LABEL: test_x86_avx2_vinserti128:
81 %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7)
84 declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone
87 define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) {
88 ; CHECK-LABEL: test_x86_avx2_vbroadcast_sd_pd_256:
90 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
92 %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0)
95 declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly
98 define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) {
99 ; CHECK-LABEL: test_x86_avx2_vbroadcast_ss_ps:
101 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
103 %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0)
106 declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly
109 define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) {
110 ; CHECK-LABEL: test_x86_avx2_vbroadcast_ss_ps_256:
112 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
114 %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0)
117 declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly
120 define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) {
121 ; CHECK-LABEL: test_x86_avx2_pbroadcastb_128:
123 ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0
125 %res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0)
128 declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly
131 define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) {
132 ; CHECK-LABEL: test_x86_avx2_pbroadcastb_256:
134 ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
136 %res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0)
139 declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly
142 define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) {
143 ; CHECK-LABEL: test_x86_avx2_pbroadcastw_128:
145 ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0
147 %res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0)
150 declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly
153 define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) {
154 ; CHECK-LABEL: test_x86_avx2_pbroadcastw_256:
156 ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
158 %res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0)
161 declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly
164 define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) {
165 ; CHECK-LABEL: test_x86_avx2_pbroadcastd_128:
167 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
169 %res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0)
172 declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly
175 define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) {
176 ; CHECK-LABEL: test_x86_avx2_pbroadcastd_256:
178 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
180 %res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0)
183 declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly
186 define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) {
187 ; CHECK-LABEL: test_x86_avx2_pbroadcastq_128:
189 ; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
191 %res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0)
194 declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly
197 define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) {
198 ; CHECK-LABEL: test_x86_avx2_pbroadcastq_256:
200 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
202 %res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0)
205 declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly