1 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
3 define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
6 %1 = load <16 x i8>* %a
7 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
8 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
9 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
10 ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
11 store <16 x i8> %2, <16 x i8>* %c
12 ; CHECK-DAG: st.b [[R3]], 0($4)
15 ; CHECK: .size vshf_v16i8_0
18 define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
19 ; CHECK: vshf_v16i8_1:
21 %1 = load <16 x i8>* %a
22 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
23 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
24 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
25 store <16 x i8> %2, <16 x i8>* %c
26 ; CHECK-DAG: st.b [[R3]], 0($4)
29 ; CHECK: .size vshf_v16i8_1
32 define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
33 ; CHECK: vshf_v16i8_2:
35 %1 = load <16 x i8>* %a
36 %2 = load <16 x i8>* %b
37 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
38 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
39 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
40 ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R2]]
41 store <16 x i8> %3, <16 x i8>* %c
42 ; CHECK-DAG: st.b [[R3]], 0($4)
45 ; CHECK: .size vshf_v16i8_2
48 define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
49 ; CHECK: vshf_v16i8_3:
51 %1 = load <16 x i8>* %a
52 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
53 %2 = load <16 x i8>* %b
54 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
55 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 17, i32 24, i32 25, i32 18, i32 19, i32 20, i32 28, i32 19, i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
56 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
57 ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R2]]
58 store <16 x i8> %3, <16 x i8>* %c
59 ; CHECK-DAG: st.b [[R3]], 0($4)
62 ; CHECK: .size vshf_v16i8_3
65 define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
66 ; CHECK: vshf_v16i8_4:
68 %1 = load <16 x i8>* %a
69 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
70 %2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17>
71 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
72 store <16 x i8> %2, <16 x i8>* %c
73 ; CHECK-DAG: st.b [[R3]], 0($4)
76 ; CHECK: .size vshf_v16i8_4
79 define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
80 ; CHECK: vshf_v8i16_0:
82 %1 = load <8 x i16>* %a
83 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
84 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
85 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
86 ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
87 store <8 x i16> %2, <8 x i16>* %c
88 ; CHECK-DAG: st.h [[R3]], 0($4)
91 ; CHECK: .size vshf_v8i16_0
94 define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
95 ; CHECK: vshf_v8i16_1:
97 %1 = load <8 x i16>* %a
98 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
99 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
100 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
101 store <8 x i16> %2, <8 x i16>* %c
102 ; CHECK-DAG: st.h [[R3]], 0($4)
105 ; CHECK: .size vshf_v8i16_1
108 define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
109 ; CHECK: vshf_v8i16_2:
111 %1 = load <8 x i16>* %a
112 %2 = load <8 x i16>* %b
113 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
114 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
115 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
116 ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R2]]
117 store <8 x i16> %3, <8 x i16>* %c
118 ; CHECK-DAG: st.h [[R3]], 0($4)
121 ; CHECK: .size vshf_v8i16_2
124 define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
125 ; CHECK: vshf_v8i16_3:
127 %1 = load <8 x i16>* %a
128 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
129 %2 = load <8 x i16>* %b
130 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
131 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
132 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
133 ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R2]]
134 store <8 x i16> %3, <8 x i16>* %c
135 ; CHECK-DAG: st.h [[R3]], 0($4)
138 ; CHECK: .size vshf_v8i16_3
141 define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
142 ; CHECK: vshf_v8i16_4:
144 %1 = load <8 x i16>* %a
145 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
146 %2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> <i32 1, i32 9, i32 1, i32 9, i32 1, i32 9, i32 1, i32 9>
147 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
148 store <8 x i16> %2, <8 x i16>* %c
149 ; CHECK-DAG: st.h [[R3]], 0($4)
152 ; CHECK: .size vshf_v8i16_4
155 ; Note: v4i32 only has one 4-element set so it's impossible to get a vshf.w
156 ; instruction when using a single vector.
158 define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
159 ; CHECK: vshf_v4i32_0:
161 %1 = load <4 x i32>* %a
162 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
163 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
164 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
165 store <4 x i32> %2, <4 x i32>* %c
166 ; CHECK-DAG: st.w [[R3]], 0($4)
169 ; CHECK: .size vshf_v4i32_0
172 define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
173 ; CHECK: vshf_v4i32_1:
175 %1 = load <4 x i32>* %a
176 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
177 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
178 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
179 store <4 x i32> %2, <4 x i32>* %c
180 ; CHECK-DAG: st.w [[R3]], 0($4)
183 ; CHECK: .size vshf_v4i32_1
186 define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
187 ; CHECK: vshf_v4i32_2:
189 %1 = load <4 x i32>* %a
190 %2 = load <4 x i32>* %b
191 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
192 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 5, i32 6, i32 4>
193 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R2]], 36
194 store <4 x i32> %3, <4 x i32>* %c
195 ; CHECK-DAG: st.w [[R3]], 0($4)
198 ; CHECK: .size vshf_v4i32_2
201 define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
202 ; CHECK: vshf_v4i32_3:
204 %1 = load <4 x i32>* %a
205 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
206 %2 = load <4 x i32>* %b
207 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
208 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 6, i32 4>
209 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], %lo
210 ; CHECK-DAG: vshf.w [[R3]], [[R1]], [[R2]]
211 store <4 x i32> %3, <4 x i32>* %c
212 ; CHECK-DAG: st.w [[R3]], 0($4)
215 ; CHECK: .size vshf_v4i32_3
218 define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
219 ; CHECK: vshf_v4i32_4:
221 %1 = load <4 x i32>* %a
222 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
223 %2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 1>
224 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
225 store <4 x i32> %2, <4 x i32>* %c
226 ; CHECK-DAG: st.w [[R3]], 0($4)
229 ; CHECK: .size vshf_v4i32_4
232 define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
233 ; CHECK: vshf_v2i64_0:
235 %1 = load <2 x i64>* %a
236 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
237 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
238 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
239 ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
240 store <2 x i64> %2, <2 x i64>* %c
241 ; CHECK-DAG: st.d [[R3]], 0($4)
244 ; CHECK: .size vshf_v2i64_0
247 define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
248 ; CHECK: vshf_v2i64_1:
250 %1 = load <2 x i64>* %a
251 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
252 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
253 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
254 store <2 x i64> %2, <2 x i64>* %c
255 ; CHECK-DAG: st.d [[R3]], 0($4)
258 ; CHECK: .size vshf_v2i64_1
261 define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
262 ; CHECK: vshf_v2i64_2:
264 %1 = load <2 x i64>* %a
265 %2 = load <2 x i64>* %b
266 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
267 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 2>
268 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
269 ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R2]]
270 store <2 x i64> %3, <2 x i64>* %c
271 ; CHECK-DAG: st.d [[R3]], 0($4)
274 ; CHECK: .size vshf_v2i64_2
277 define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
278 ; CHECK: vshf_v2i64_3:
280 %1 = load <2 x i64>* %a
281 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
282 %2 = load <2 x i64>* %b
283 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
284 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 2>
285 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
286 ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R2]]
287 store <2 x i64> %3, <2 x i64>* %c
288 ; CHECK-DAG: st.d [[R3]], 0($4)
291 ; CHECK: .size vshf_v2i64_3
294 define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
295 ; CHECK: vshf_v2i64_4:
297 %1 = load <2 x i64>* %a
298 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
299 %2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> <i32 1, i32 3>
300 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
301 store <2 x i64> %2, <2 x i64>* %c
302 ; CHECK-DAG: st.d [[R3]], 0($4)
305 ; CHECK: .size vshf_v2i64_4
308 define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
309 ; CHECK: shf_v16i8_0:
311 %1 = load <16 x i8>* %a
312 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
313 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 0, i32 5, i32 7, i32 6, i32 4, i32 9, i32 11, i32 10, i32 8, i32 13, i32 15, i32 14, i32 12>
314 ; CHECK-DAG: shf.b [[R3:\$w[0-9]+]], [[R1]], 45
315 store <16 x i8> %2, <16 x i8>* %c
316 ; CHECK-DAG: st.b [[R3]], 0($4)
319 ; CHECK: .size shf_v16i8_0
322 define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
323 ; CHECK: shf_v8i16_0:
325 %1 = load <8 x i16>* %a
326 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
327 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
328 ; CHECK-DAG: shf.h [[R3:\$w[0-9]+]], [[R1]], 27
329 store <8 x i16> %2, <8 x i16>* %c
330 ; CHECK-DAG: st.h [[R3]], 0($4)
333 ; CHECK: .size shf_v8i16_0
336 define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
337 ; CHECK: shf_v4i32_0:
339 %1 = load <4 x i32>* %a
340 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
341 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
342 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
343 store <4 x i32> %2, <4 x i32>* %c
344 ; CHECK-DAG: st.w [[R3]], 0($4)
347 ; CHECK: .size shf_v4i32_0
350 ; shf.d does not exist
352 define void @ilvev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
353 ; CHECK: ilvev_v16i8_0:
355 %1 = load <16 x i8>* %a
356 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
357 %2 = load <16 x i8>* %b
358 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
359 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
360 <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
361 ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
362 store <16 x i8> %3, <16 x i8>* %c
363 ; CHECK-DAG: st.b [[R3]], 0($4)
366 ; CHECK: .size ilvev_v16i8_0
369 define void @ilvev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
370 ; CHECK: ilvev_v8i16_0:
372 %1 = load <8 x i16>* %a
373 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
374 %2 = load <8 x i16>* %b
375 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
376 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
377 ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
378 store <8 x i16> %3, <8 x i16>* %c
379 ; CHECK-DAG: st.h [[R3]], 0($4)
382 ; CHECK: .size ilvev_v8i16_0
385 define void @ilvev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
386 ; CHECK: ilvev_v4i32_0:
388 %1 = load <4 x i32>* %a
389 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
390 %2 = load <4 x i32>* %b
391 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
392 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
393 ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
394 store <4 x i32> %3, <4 x i32>* %c
395 ; CHECK-DAG: st.w [[R3]], 0($4)
398 ; CHECK: .size ilvev_v4i32_0
401 define void @ilvev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
402 ; CHECK: ilvev_v2i64_0:
404 %1 = load <2 x i64>* %a
405 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
406 %2 = load <2 x i64>* %b
407 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
408 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
409 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
410 store <2 x i64> %3, <2 x i64>* %c
411 ; CHECK-DAG: st.d [[R3]], 0($4)
414 ; CHECK: .size ilvev_v2i64_0
417 define void @ilvod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
418 ; CHECK: ilvod_v16i8_0:
420 %1 = load <16 x i8>* %a
421 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
422 %2 = load <16 x i8>* %b
423 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
424 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
425 <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
426 ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
427 store <16 x i8> %3, <16 x i8>* %c
428 ; CHECK-DAG: st.b [[R3]], 0($4)
431 ; CHECK: .size ilvod_v16i8_0
434 define void @ilvod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
435 ; CHECK: ilvod_v8i16_0:
437 %1 = load <8 x i16>* %a
438 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
439 %2 = load <8 x i16>* %b
440 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
441 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
442 ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
443 store <8 x i16> %3, <8 x i16>* %c
444 ; CHECK-DAG: st.h [[R3]], 0($4)
447 ; CHECK: .size ilvod_v8i16_0
450 define void @ilvod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
451 ; CHECK: ilvod_v4i32_0:
453 %1 = load <4 x i32>* %a
454 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
455 %2 = load <4 x i32>* %b
456 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
457 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
458 ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
459 store <4 x i32> %3, <4 x i32>* %c
460 ; CHECK-DAG: st.w [[R3]], 0($4)
463 ; CHECK: .size ilvod_v4i32_0
466 define void @ilvod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
467 ; CHECK: ilvod_v2i64_0:
469 %1 = load <2 x i64>* %a
470 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
471 %2 = load <2 x i64>* %b
472 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
473 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
474 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
475 store <2 x i64> %3, <2 x i64>* %c
476 ; CHECK-DAG: st.d [[R3]], 0($4)
479 ; CHECK: .size ilvod_v2i64_0
482 define void @ilvl_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
483 ; CHECK: ilvl_v16i8_0:
485 %1 = load <16 x i8>* %a
486 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
487 %2 = load <16 x i8>* %b
488 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
489 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
490 <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
491 ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
492 store <16 x i8> %3, <16 x i8>* %c
493 ; CHECK-DAG: st.b [[R3]], 0($4)
496 ; CHECK: .size ilvl_v16i8_0
499 define void @ilvl_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
500 ; CHECK: ilvl_v8i16_0:
502 %1 = load <8 x i16>* %a
503 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
504 %2 = load <8 x i16>* %b
505 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
506 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
507 ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
508 store <8 x i16> %3, <8 x i16>* %c
509 ; CHECK-DAG: st.h [[R3]], 0($4)
512 ; CHECK: .size ilvl_v8i16_0
515 define void @ilvl_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
516 ; CHECK: ilvl_v4i32_0:
518 %1 = load <4 x i32>* %a
519 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
520 %2 = load <4 x i32>* %b
521 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
522 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
523 ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
524 store <4 x i32> %3, <4 x i32>* %c
525 ; CHECK-DAG: st.w [[R3]], 0($4)
528 ; CHECK: .size ilvl_v4i32_0
531 define void @ilvl_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
532 ; CHECK: ilvl_v2i64_0:
534 %1 = load <2 x i64>* %a
535 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
536 %2 = load <2 x i64>* %b
537 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
538 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
539 ; ilvl.d and ilvev.d are equivalent for v2i64
540 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
541 store <2 x i64> %3, <2 x i64>* %c
542 ; CHECK-DAG: st.d [[R3]], 0($4)
545 ; CHECK: .size ilvl_v2i64_0
548 define void @ilvr_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
549 ; CHECK: ilvr_v16i8_0:
551 %1 = load <16 x i8>* %a
552 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
553 %2 = load <16 x i8>* %b
554 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
555 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
556 <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
557 ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
558 store <16 x i8> %3, <16 x i8>* %c
559 ; CHECK-DAG: st.b [[R3]], 0($4)
562 ; CHECK: .size ilvr_v16i8_0
565 define void @ilvr_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
566 ; CHECK: ilvr_v8i16_0:
568 %1 = load <8 x i16>* %a
569 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
570 %2 = load <8 x i16>* %b
571 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
572 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
573 ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
574 store <8 x i16> %3, <8 x i16>* %c
575 ; CHECK-DAG: st.h [[R3]], 0($4)
578 ; CHECK: .size ilvr_v8i16_0
581 define void @ilvr_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
582 ; CHECK: ilvr_v4i32_0:
584 %1 = load <4 x i32>* %a
585 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
586 %2 = load <4 x i32>* %b
587 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
588 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
589 ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
590 store <4 x i32> %3, <4 x i32>* %c
591 ; CHECK-DAG: st.w [[R3]], 0($4)
594 ; CHECK: .size ilvr_v4i32_0
597 define void @ilvr_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
598 ; CHECK: ilvr_v2i64_0:
600 %1 = load <2 x i64>* %a
601 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
602 %2 = load <2 x i64>* %b
603 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
604 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
605 ; ilvr.d and ilvod.d are equivalent for v2i64
606 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
607 store <2 x i64> %3, <2 x i64>* %c
608 ; CHECK-DAG: st.d [[R3]], 0($4)
611 ; CHECK: .size ilvr_v2i64_0
614 define void @pckev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
615 ; CHECK: pckev_v16i8_0:
617 %1 = load <16 x i8>* %a
618 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
619 %2 = load <16 x i8>* %b
620 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
621 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
622 <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
623 ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
624 store <16 x i8> %3, <16 x i8>* %c
625 ; CHECK-DAG: st.b [[R3]], 0($4)
628 ; CHECK: .size pckev_v16i8_0
631 define void @pckev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
632 ; CHECK: pckev_v8i16_0:
634 %1 = load <8 x i16>* %a
635 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
636 %2 = load <8 x i16>* %b
637 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
638 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
639 ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
640 store <8 x i16> %3, <8 x i16>* %c
641 ; CHECK-DAG: st.h [[R3]], 0($4)
644 ; CHECK: .size pckev_v8i16_0
647 define void @pckev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
648 ; CHECK: pckev_v4i32_0:
650 %1 = load <4 x i32>* %a
651 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
652 %2 = load <4 x i32>* %b
653 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
654 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
655 ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
656 store <4 x i32> %3, <4 x i32>* %c
657 ; CHECK-DAG: st.w [[R3]], 0($4)
660 ; CHECK: .size pckev_v4i32_0
663 define void @pckev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
664 ; CHECK: pckev_v2i64_0:
666 %1 = load <2 x i64>* %a
667 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
668 %2 = load <2 x i64>* %b
669 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
670 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
671 ; pckev.d and ilvev.d are equivalent for v2i64
672 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
673 store <2 x i64> %3, <2 x i64>* %c
674 ; CHECK-DAG: st.d [[R3]], 0($4)
677 ; CHECK: .size pckev_v2i64_0
680 define void @pckod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
681 ; CHECK: pckod_v16i8_0:
683 %1 = load <16 x i8>* %a
684 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
685 %2 = load <16 x i8>* %b
686 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
687 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
688 <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
689 ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
690 store <16 x i8> %3, <16 x i8>* %c
691 ; CHECK-DAG: st.b [[R3]], 0($4)
694 ; CHECK: .size pckod_v16i8_0
697 define void @pckod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
698 ; CHECK: pckod_v8i16_0:
700 %1 = load <8 x i16>* %a
701 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
702 %2 = load <8 x i16>* %b
703 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
704 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
705 ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
706 store <8 x i16> %3, <8 x i16>* %c
707 ; CHECK-DAG: st.h [[R3]], 0($4)
710 ; CHECK: .size pckod_v8i16_0
713 define void @pckod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
714 ; CHECK: pckod_v4i32_0:
716 %1 = load <4 x i32>* %a
717 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
718 %2 = load <4 x i32>* %b
719 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
720 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
721 ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
722 store <4 x i32> %3, <4 x i32>* %c
723 ; CHECK-DAG: st.w [[R3]], 0($4)
726 ; CHECK: .size pckod_v4i32_0
729 define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
730 ; CHECK: pckod_v2i64_0:
732 %1 = load <2 x i64>* %a
733 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
734 %2 = load <2 x i64>* %b
735 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
736 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
737 ; pckod.d and ilvod.d are equivalent for v2i64
738 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
739 store <2 x i64> %3, <2 x i64>* %c
740 ; CHECK-DAG: st.d [[R3]], 0($4)
743 ; CHECK: .size pckod_v2i64_0
746 define void @splati_v16i8_0(<16 x i8>* %c, <16 x i8>* %a) nounwind {
747 ; CHECK: splati_v16i8_0:
749 %1 = load <16 x i8>* %a
750 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
751 %2 = shufflevector <16 x i8> %1, <16 x i8> undef,
752 <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
753 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][4]
754 store <16 x i8> %2, <16 x i8>* %c
755 ; CHECK-DAG: st.b [[R3]], 0($4)
758 ; CHECK: .size splati_v16i8_0
761 define void @splati_v8i16_0(<8 x i16>* %c, <8 x i16>* %a) nounwind {
762 ; CHECK: splati_v8i16_0:
764 %1 = load <8 x i16>* %a
765 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
766 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
767 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][4]
768 store <8 x i16> %2, <8 x i16>* %c
769 ; CHECK-DAG: st.h [[R3]], 0($4)
772 ; CHECK: .size splati_v8i16_0
775 define void @splati_v4i32_0(<4 x i32>* %c, <4 x i32>* %a) nounwind {
776 ; CHECK: splati_v4i32_0:
778 %1 = load <4 x i32>* %a
779 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
780 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
781 ; shf.w and splati.w are equivalent
782 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 255
783 store <4 x i32> %2, <4 x i32>* %c
784 ; CHECK-DAG: st.w [[R3]], 0($4)
787 ; CHECK: .size splati_v4i32_0
790 define void @splati_v2i64_0(<2 x i64>* %c, <2 x i64>* %a) nounwind {
791 ; CHECK: splati_v2i64_0:
793 %1 = load <2 x i64>* %a
794 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
795 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
796 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
797 store <2 x i64> %2, <2 x i64>* %c
798 ; CHECK-DAG: st.d [[R3]], 0($4)
801 ; CHECK: .size splati_v2i64_0