1 ; RUN: opt < %s -instcombine -S | FileCheck %s
3 ; This should never happen, but make sure we don't crash handling a non-constant immediate byte.
5 define <4 x double> @perm2pd_non_const_imm(<4 x double> %a0, <4 x double> %a1, i8 %b) {
6 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b)
9 ; CHECK-LABEL: @perm2pd_non_const_imm
10 ; CHECK-NEXT: call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b)
11 ; CHECK-NEXT: ret <4 x double>
15 ; In the following 4 tests, both zero mask bits of the immediate are set.
17 define <4 x double> @perm2pd_0x88(<4 x double> %a0, <4 x double> %a1) {
18 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 136)
21 ; CHECK-LABEL: @perm2pd_0x88
22 ; CHECK-NEXT: ret <4 x double> zeroinitializer
25 define <8 x float> @perm2ps_0x88(<8 x float> %a0, <8 x float> %a1) {
26 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 136)
29 ; CHECK-LABEL: @perm2ps_0x88
30 ; CHECK-NEXT: ret <8 x float> zeroinitializer
33 define <8 x i32> @perm2si_0x88(<8 x i32> %a0, <8 x i32> %a1) {
34 %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 136)
37 ; CHECK-LABEL: @perm2si_0x88
38 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
41 define <4 x i64> @perm2i_0x88(<4 x i64> %a0, <4 x i64> %a1) {
42 %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 136)
45 ; CHECK-LABEL: @perm2i_0x88
46 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
50 ; The other control bits are ignored when zero mask bits of the immediate are set.
52 define <4 x double> @perm2pd_0xff(<4 x double> %a0, <4 x double> %a1) {
53 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 255)
56 ; CHECK-LABEL: @perm2pd_0xff
57 ; CHECK-NEXT: ret <4 x double> zeroinitializer
61 ; The following 16 tests are simple shuffles, except for 2 cases where we can just return one of the
62 ; source vectors. Verify that we generate the right shuffle masks and undef source operand where possible..
64 define <4 x double> @perm2pd_0x00(<4 x double> %a0, <4 x double> %a1) {
65 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 0)
68 ; CHECK-LABEL: @perm2pd_0x00
69 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
70 ; CHECK-NEXT: ret <4 x double> %1
73 define <4 x double> @perm2pd_0x01(<4 x double> %a0, <4 x double> %a1) {
74 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 1)
77 ; CHECK-LABEL: @perm2pd_0x01
78 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
79 ; CHECK-NEXT: ret <4 x double> %1
82 define <4 x double> @perm2pd_0x02(<4 x double> %a0, <4 x double> %a1) {
83 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 2)
86 ; CHECK-LABEL: @perm2pd_0x02
87 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
88 ; CHECK-NEXT: ret <4 x double> %1
91 define <4 x double> @perm2pd_0x03(<4 x double> %a0, <4 x double> %a1) {
92 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3)
95 ; CHECK-LABEL: @perm2pd_0x03
96 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
97 ; CHECK-NEXT: ret <4 x double> %1
100 define <4 x double> @perm2pd_0x10(<4 x double> %a0, <4 x double> %a1) {
101 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 16)
102 ret <4 x double> %res
104 ; CHECK-LABEL: @perm2pd_0x10
105 ; CHECK-NEXT: ret <4 x double> %a0
108 define <4 x double> @perm2pd_0x11(<4 x double> %a0, <4 x double> %a1) {
109 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 17)
110 ret <4 x double> %res
112 ; CHECK-LABEL: @perm2pd_0x11
113 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
114 ; CHECK-NEXT: ret <4 x double> %1
117 define <4 x double> @perm2pd_0x12(<4 x double> %a0, <4 x double> %a1) {
118 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 18)
119 ret <4 x double> %res
121 ; CHECK-LABEL: @perm2pd_0x12
122 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
123 ; CHECK-NEXT: ret <4 x double> %1
126 define <4 x double> @perm2pd_0x13(<4 x double> %a0, <4 x double> %a1) {
127 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 19)
128 ret <4 x double> %res
130 ; CHECK-LABEL: @perm2pd_0x13
131 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
132 ; CHECK-NEXT: ret <4 x double> %1
135 define <4 x double> @perm2pd_0x20(<4 x double> %a0, <4 x double> %a1) {
136 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 32)
137 ret <4 x double> %res
139 ; CHECK-LABEL: @perm2pd_0x20
140 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
141 ; CHECK-NEXT: ret <4 x double> %1
144 define <4 x double> @perm2pd_0x21(<4 x double> %a0, <4 x double> %a1) {
145 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 33)
146 ret <4 x double> %res
148 ; CHECK-LABEL: @perm2pd_0x21
149 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
150 ; CHECK-NEXT: ret <4 x double> %1
153 define <4 x double> @perm2pd_0x22(<4 x double> %a0, <4 x double> %a1) {
154 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 34)
155 ret <4 x double> %res
157 ; CHECK-LABEL: @perm2pd_0x22
158 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
159 ; CHECK-NEXT: ret <4 x double> %1
162 define <4 x double> @perm2pd_0x23(<4 x double> %a0, <4 x double> %a1) {
163 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 35)
164 ret <4 x double> %res
166 ; CHECK-LABEL: @perm2pd_0x23
167 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
168 ; CHECK-NEXT: ret <4 x double> %1
171 define <4 x double> @perm2pd_0x30(<4 x double> %a0, <4 x double> %a1) {
172 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 48)
173 ret <4 x double> %res
175 ; CHECK-LABEL: @perm2pd_0x30
176 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
177 ; CHECK-NEXT: ret <4 x double> %1
180 define <4 x double> @perm2pd_0x31(<4 x double> %a0, <4 x double> %a1) {
181 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 49)
182 ret <4 x double> %res
184 ; CHECK-LABEL: @perm2pd_0x31
185 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
186 ; CHECK-NEXT: ret <4 x double> %1
189 define <4 x double> @perm2pd_0x32(<4 x double> %a0, <4 x double> %a1) {
190 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 50)
191 ret <4 x double> %res
193 ; CHECK-LABEL: @perm2pd_0x32
194 ; CHECK-NEXT: ret <4 x double> %a1
197 define <4 x double> @perm2pd_0x33(<4 x double> %a0, <4 x double> %a1) {
198 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 51)
199 ret <4 x double> %res
201 ; CHECK-LABEL: @perm2pd_0x33
202 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
203 ; CHECK-NEXT: ret <4 x double> %1
206 ; Confirm that a mask for 32-bit elements is also correct.
208 define <8 x float> @perm2ps_0x31(<8 x float> %a0, <8 x float> %a1) {
209 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 49)
212 ; CHECK-LABEL: @perm2ps_0x31
213 ; CHECK-NEXT: %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
214 ; CHECK-NEXT: ret <8 x float> %1
218 ; Confirm that the AVX2 version works the same.
220 define <4 x i64> @perm2i_0x33(<4 x i64> %a0, <4 x i64> %a1) {
221 %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 51)
224 ; CHECK-LABEL: @perm2i_0x33
225 ; CHECK-NEXT: %1 = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
226 ; CHECK-NEXT: ret <4 x i64> %1
230 ; Confirm that when a single zero mask bit is set, we replace a source vector with zeros.
232 define <4 x double> @perm2pd_0x81(<4 x double> %a0, <4 x double> %a1) {
233 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 129)
234 ret <4 x double> %res
236 ; CHECK-LABEL: @perm2pd_0x81
237 ; CHECK-NEXT: shufflevector <4 x double> %a0, <4 x double> <double 0.0{{.*}}<4 x i32> <i32 2, i32 3, i32 4, i32 5>
238 ; CHECK-NEXT: ret <4 x double>
241 define <4 x double> @perm2pd_0x83(<4 x double> %a0, <4 x double> %a1) {
242 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 131)
243 ret <4 x double> %res
245 ; CHECK-LABEL: @perm2pd_0x83
246 ; CHECK-NEXT: shufflevector <4 x double> %a1, <4 x double> <double 0.0{{.*}}, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
247 ; CHECK-NEXT: ret <4 x double>
250 define <4 x double> @perm2pd_0x28(<4 x double> %a0, <4 x double> %a1) {
251 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 40)
252 ret <4 x double> %res
254 ; CHECK-LABEL: @perm2pd_0x28
255 ; CHECK-NEXT: shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
256 ; CHECK-NEXT: ret <4 x double>
259 define <4 x double> @perm2pd_0x08(<4 x double> %a0, <4 x double> %a1) {
260 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 8)
261 ret <4 x double> %res
263 ; CHECK-LABEL: @perm2pd_0x08
264 ; CHECK-NEXT: shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
265 ; CHECK-NEXT: ret <4 x double>
268 ; Check one more with the AVX2 version.
270 define <4 x i64> @perm2i_0x28(<4 x i64> %a0, <4 x i64> %a1) {
271 %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 40)
274 ; CHECK-LABEL: @perm2i_0x28
275 ; CHECK-NEXT: shufflevector <4 x i64> <i64 0{{.*}}, <4 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
276 ; CHECK-NEXT: ret <4 x i64>
279 declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
280 declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
281 declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
282 declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readnone