1 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
3 define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
6 %1 = load <16 x i8>* %a
7 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
8 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
9 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
10 ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
11 store <16 x i8> %2, <16 x i8>* %c
12 ; CHECK-DAG: st.b [[R3]], 0($4)
15 ; CHECK: .size vshf_v16i8_0
18 define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
19 ; CHECK: vshf_v16i8_1:
21 %1 = load <16 x i8>* %a
22 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
23 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
24 ; CHECK-DAG: ldi.b [[R3:\$w[0-9]+]], 1
25 ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
26 store <16 x i8> %2, <16 x i8>* %c
27 ; CHECK-DAG: st.b [[R3]], 0($4)
30 ; CHECK: .size vshf_v16i8_1
33 define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
34 ; CHECK: vshf_v16i8_2:
36 %1 = load <16 x i8>* %a
37 %2 = load <16 x i8>* %b
38 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
39 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
40 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
41 ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R2]]
42 store <16 x i8> %3, <16 x i8>* %c
43 ; CHECK-DAG: st.b [[R3]], 0($4)
46 ; CHECK: .size vshf_v16i8_2
49 define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
50 ; CHECK: vshf_v16i8_3:
52 %1 = load <16 x i8>* %a
53 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
54 %2 = load <16 x i8>* %b
55 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
56 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 17, i32 24, i32 25, i32 18, i32 19, i32 20, i32 28, i32 19, i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
57 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
58 ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R2]]
59 store <16 x i8> %3, <16 x i8>* %c
60 ; CHECK-DAG: st.b [[R3]], 0($4)
63 ; CHECK: .size vshf_v16i8_3
66 define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
67 ; CHECK: vshf_v16i8_4:
69 %1 = load <16 x i8>* %a
70 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
71 %2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17>
72 ; CHECK-DAG: ldi.b [[R3:\$w[0-9]+]], 1
73 ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
74 store <16 x i8> %2, <16 x i8>* %c
75 ; CHECK-DAG: st.b [[R3]], 0($4)
78 ; CHECK: .size vshf_v16i8_4
81 define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
82 ; CHECK: vshf_v8i16_0:
84 %1 = load <8 x i16>* %a
85 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
86 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
87 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
88 ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
89 store <8 x i16> %2, <8 x i16>* %c
90 ; CHECK-DAG: st.h [[R3]], 0($4)
93 ; CHECK: .size vshf_v8i16_0
96 define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
97 ; CHECK: vshf_v8i16_1:
99 %1 = load <8 x i16>* %a
100 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
101 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
102 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
103 ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
104 store <8 x i16> %2, <8 x i16>* %c
105 ; CHECK-DAG: st.h [[R3]], 0($4)
108 ; CHECK: .size vshf_v8i16_1
111 define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
112 ; CHECK: vshf_v8i16_2:
114 %1 = load <8 x i16>* %a
115 %2 = load <8 x i16>* %b
116 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
117 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
118 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
119 ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R2]]
120 store <8 x i16> %3, <8 x i16>* %c
121 ; CHECK-DAG: st.h [[R3]], 0($4)
124 ; CHECK: .size vshf_v8i16_2
127 define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
128 ; CHECK: vshf_v8i16_3:
130 %1 = load <8 x i16>* %a
131 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
132 %2 = load <8 x i16>* %b
133 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
134 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
135 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
136 ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R2]]
137 store <8 x i16> %3, <8 x i16>* %c
138 ; CHECK-DAG: st.h [[R3]], 0($4)
141 ; CHECK: .size vshf_v8i16_3
144 define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
145 ; CHECK: vshf_v8i16_4:
147 %1 = load <8 x i16>* %a
148 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
149 %2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> <i32 1, i32 9, i32 1, i32 9, i32 1, i32 9, i32 1, i32 9>
150 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
151 ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
152 store <8 x i16> %2, <8 x i16>* %c
153 ; CHECK-DAG: st.h [[R3]], 0($4)
156 ; CHECK: .size vshf_v8i16_4
159 ; Note: v4i32 only has one 4-element set so it's impossible to get a vshf.w
160 ; instruction when using a single vector.
162 define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
163 ; CHECK: vshf_v4i32_0:
165 %1 = load <4 x i32>* %a
166 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
167 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
168 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
169 store <4 x i32> %2, <4 x i32>* %c
170 ; CHECK-DAG: st.w [[R3]], 0($4)
173 ; CHECK: .size vshf_v4i32_0
176 define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
177 ; CHECK: vshf_v4i32_1:
179 %1 = load <4 x i32>* %a
180 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
181 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
182 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
183 store <4 x i32> %2, <4 x i32>* %c
184 ; CHECK-DAG: st.w [[R3]], 0($4)
187 ; CHECK: .size vshf_v4i32_1
190 define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
191 ; CHECK: vshf_v4i32_2:
193 %1 = load <4 x i32>* %a
194 %2 = load <4 x i32>* %b
195 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
196 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 5, i32 6, i32 4>
197 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R2]], 36
198 store <4 x i32> %3, <4 x i32>* %c
199 ; CHECK-DAG: st.w [[R3]], 0($4)
202 ; CHECK: .size vshf_v4i32_2
205 define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
206 ; CHECK: vshf_v4i32_3:
208 %1 = load <4 x i32>* %a
209 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
210 %2 = load <4 x i32>* %b
211 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
212 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 6, i32 4>
213 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], %lo
214 ; CHECK-DAG: vshf.w [[R3]], [[R1]], [[R2]]
215 store <4 x i32> %3, <4 x i32>* %c
216 ; CHECK-DAG: st.w [[R3]], 0($4)
219 ; CHECK: .size vshf_v4i32_3
222 define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
223 ; CHECK: vshf_v4i32_4:
225 %1 = load <4 x i32>* %a
226 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
227 %2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 1>
228 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
229 store <4 x i32> %2, <4 x i32>* %c
230 ; CHECK-DAG: st.w [[R3]], 0($4)
233 ; CHECK: .size vshf_v4i32_4
236 define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
237 ; CHECK: vshf_v2i64_0:
239 %1 = load <2 x i64>* %a
240 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
241 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
242 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
243 ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
244 store <2 x i64> %2, <2 x i64>* %c
245 ; CHECK-DAG: st.d [[R3]], 0($4)
248 ; CHECK: .size vshf_v2i64_0
251 define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
252 ; CHECK: vshf_v2i64_1:
254 %1 = load <2 x i64>* %a
255 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
256 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
257 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
258 ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
259 store <2 x i64> %2, <2 x i64>* %c
260 ; CHECK-DAG: st.d [[R3]], 0($4)
263 ; CHECK: .size vshf_v2i64_1
266 define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
267 ; CHECK: vshf_v2i64_2:
269 %1 = load <2 x i64>* %a
270 %2 = load <2 x i64>* %b
271 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
272 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 2>
273 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
274 ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R2]]
275 store <2 x i64> %3, <2 x i64>* %c
276 ; CHECK-DAG: st.d [[R3]], 0($4)
279 ; CHECK: .size vshf_v2i64_2
282 define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
283 ; CHECK: vshf_v2i64_3:
285 %1 = load <2 x i64>* %a
286 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
287 %2 = load <2 x i64>* %b
288 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
289 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 2>
290 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
291 ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R2]]
292 store <2 x i64> %3, <2 x i64>* %c
293 ; CHECK-DAG: st.d [[R3]], 0($4)
296 ; CHECK: .size vshf_v2i64_3
299 define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
300 ; CHECK: vshf_v2i64_4:
302 %1 = load <2 x i64>* %a
303 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
304 %2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> <i32 1, i32 3>
305 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
306 ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
307 store <2 x i64> %2, <2 x i64>* %c
308 ; CHECK-DAG: st.d [[R3]], 0($4)
311 ; CHECK: .size vshf_v2i64_4
314 define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
315 ; CHECK: shf_v16i8_0:
317 %1 = load <16 x i8>* %a
318 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
319 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 0, i32 5, i32 7, i32 6, i32 4, i32 9, i32 11, i32 10, i32 8, i32 13, i32 15, i32 14, i32 12>
320 ; CHECK-DAG: shf.b [[R3:\$w[0-9]+]], [[R1]], 45
321 store <16 x i8> %2, <16 x i8>* %c
322 ; CHECK-DAG: st.b [[R3]], 0($4)
325 ; CHECK: .size shf_v16i8_0
328 define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
329 ; CHECK: shf_v8i16_0:
331 %1 = load <8 x i16>* %a
332 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
333 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
334 ; CHECK-DAG: shf.h [[R3:\$w[0-9]+]], [[R1]], 27
335 store <8 x i16> %2, <8 x i16>* %c
336 ; CHECK-DAG: st.h [[R3]], 0($4)
339 ; CHECK: .size shf_v8i16_0
342 define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
343 ; CHECK: shf_v4i32_0:
345 %1 = load <4 x i32>* %a
346 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
347 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
348 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
349 store <4 x i32> %2, <4 x i32>* %c
350 ; CHECK-DAG: st.w [[R3]], 0($4)
353 ; CHECK: .size shf_v4i32_0
356 ; shf.d does not exist
358 define void @ilvev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
359 ; CHECK: ilvev_v16i8_0:
361 %1 = load <16 x i8>* %a
362 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
363 %2 = load <16 x i8>* %b
364 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
365 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
366 <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
367 ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
368 store <16 x i8> %3, <16 x i8>* %c
369 ; CHECK-DAG: st.b [[R3]], 0($4)
372 ; CHECK: .size ilvev_v16i8_0
375 define void @ilvev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
376 ; CHECK: ilvev_v8i16_0:
378 %1 = load <8 x i16>* %a
379 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
380 %2 = load <8 x i16>* %b
381 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
382 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
383 ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
384 store <8 x i16> %3, <8 x i16>* %c
385 ; CHECK-DAG: st.h [[R3]], 0($4)
388 ; CHECK: .size ilvev_v8i16_0
391 define void @ilvev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
392 ; CHECK: ilvev_v4i32_0:
394 %1 = load <4 x i32>* %a
395 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
396 %2 = load <4 x i32>* %b
397 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
398 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
399 ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
400 store <4 x i32> %3, <4 x i32>* %c
401 ; CHECK-DAG: st.w [[R3]], 0($4)
404 ; CHECK: .size ilvev_v4i32_0
407 define void @ilvev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
408 ; CHECK: ilvev_v2i64_0:
410 %1 = load <2 x i64>* %a
411 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
412 %2 = load <2 x i64>* %b
413 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
414 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
415 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
416 store <2 x i64> %3, <2 x i64>* %c
417 ; CHECK-DAG: st.d [[R3]], 0($4)
420 ; CHECK: .size ilvev_v2i64_0
423 define void @ilvod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
424 ; CHECK: ilvod_v16i8_0:
426 %1 = load <16 x i8>* %a
427 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
428 %2 = load <16 x i8>* %b
429 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
430 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
431 <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
432 ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
433 store <16 x i8> %3, <16 x i8>* %c
434 ; CHECK-DAG: st.b [[R3]], 0($4)
437 ; CHECK: .size ilvod_v16i8_0
440 define void @ilvod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
441 ; CHECK: ilvod_v8i16_0:
443 %1 = load <8 x i16>* %a
444 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
445 %2 = load <8 x i16>* %b
446 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
447 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
448 ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
449 store <8 x i16> %3, <8 x i16>* %c
450 ; CHECK-DAG: st.h [[R3]], 0($4)
453 ; CHECK: .size ilvod_v8i16_0
456 define void @ilvod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
457 ; CHECK: ilvod_v4i32_0:
459 %1 = load <4 x i32>* %a
460 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
461 %2 = load <4 x i32>* %b
462 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
463 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
464 ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
465 store <4 x i32> %3, <4 x i32>* %c
466 ; CHECK-DAG: st.w [[R3]], 0($4)
469 ; CHECK: .size ilvod_v4i32_0
472 define void @ilvod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
473 ; CHECK: ilvod_v2i64_0:
475 %1 = load <2 x i64>* %a
476 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
477 %2 = load <2 x i64>* %b
478 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
479 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
480 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
481 store <2 x i64> %3, <2 x i64>* %c
482 ; CHECK-DAG: st.d [[R3]], 0($4)
485 ; CHECK: .size ilvod_v2i64_0
488 define void @ilvl_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
489 ; CHECK: ilvl_v16i8_0:
491 %1 = load <16 x i8>* %a
492 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
493 %2 = load <16 x i8>* %b
494 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
495 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
496 <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
497 ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
498 store <16 x i8> %3, <16 x i8>* %c
499 ; CHECK-DAG: st.b [[R3]], 0($4)
502 ; CHECK: .size ilvl_v16i8_0
505 define void @ilvl_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
506 ; CHECK: ilvl_v8i16_0:
508 %1 = load <8 x i16>* %a
509 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
510 %2 = load <8 x i16>* %b
511 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
512 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
513 ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
514 store <8 x i16> %3, <8 x i16>* %c
515 ; CHECK-DAG: st.h [[R3]], 0($4)
518 ; CHECK: .size ilvl_v8i16_0
521 define void @ilvl_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
522 ; CHECK: ilvl_v4i32_0:
524 %1 = load <4 x i32>* %a
525 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
526 %2 = load <4 x i32>* %b
527 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
528 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
529 ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
530 store <4 x i32> %3, <4 x i32>* %c
531 ; CHECK-DAG: st.w [[R3]], 0($4)
534 ; CHECK: .size ilvl_v4i32_0
537 define void @ilvl_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
538 ; CHECK: ilvl_v2i64_0:
540 %1 = load <2 x i64>* %a
541 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
542 %2 = load <2 x i64>* %b
543 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
544 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
545 ; ilvl.d and ilvev.d are equivalent for v2i64
546 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
547 store <2 x i64> %3, <2 x i64>* %c
548 ; CHECK-DAG: st.d [[R3]], 0($4)
551 ; CHECK: .size ilvl_v2i64_0
554 define void @ilvr_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
555 ; CHECK: ilvr_v16i8_0:
557 %1 = load <16 x i8>* %a
558 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
559 %2 = load <16 x i8>* %b
560 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
561 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
562 <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
563 ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
564 store <16 x i8> %3, <16 x i8>* %c
565 ; CHECK-DAG: st.b [[R3]], 0($4)
568 ; CHECK: .size ilvr_v16i8_0
571 define void @ilvr_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
572 ; CHECK: ilvr_v8i16_0:
574 %1 = load <8 x i16>* %a
575 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
576 %2 = load <8 x i16>* %b
577 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
578 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
579 ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
580 store <8 x i16> %3, <8 x i16>* %c
581 ; CHECK-DAG: st.h [[R3]], 0($4)
584 ; CHECK: .size ilvr_v8i16_0
587 define void @ilvr_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
588 ; CHECK: ilvr_v4i32_0:
590 %1 = load <4 x i32>* %a
591 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
592 %2 = load <4 x i32>* %b
593 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
594 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
595 ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
596 store <4 x i32> %3, <4 x i32>* %c
597 ; CHECK-DAG: st.w [[R3]], 0($4)
600 ; CHECK: .size ilvr_v4i32_0
603 define void @ilvr_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
604 ; CHECK: ilvr_v2i64_0:
606 %1 = load <2 x i64>* %a
607 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
608 %2 = load <2 x i64>* %b
609 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
610 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
611 ; ilvr.d and ilvod.d are equivalent for v2i64
612 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
613 store <2 x i64> %3, <2 x i64>* %c
614 ; CHECK-DAG: st.d [[R3]], 0($4)
617 ; CHECK: .size ilvr_v2i64_0
620 define void @pckev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
621 ; CHECK: pckev_v16i8_0:
623 %1 = load <16 x i8>* %a
624 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
625 %2 = load <16 x i8>* %b
626 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
627 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
628 <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
629 ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
630 store <16 x i8> %3, <16 x i8>* %c
631 ; CHECK-DAG: st.b [[R3]], 0($4)
634 ; CHECK: .size pckev_v16i8_0
637 define void @pckev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
638 ; CHECK: pckev_v8i16_0:
640 %1 = load <8 x i16>* %a
641 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
642 %2 = load <8 x i16>* %b
643 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
644 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
645 ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
646 store <8 x i16> %3, <8 x i16>* %c
647 ; CHECK-DAG: st.h [[R3]], 0($4)
650 ; CHECK: .size pckev_v8i16_0
653 define void @pckev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
654 ; CHECK: pckev_v4i32_0:
656 %1 = load <4 x i32>* %a
657 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
658 %2 = load <4 x i32>* %b
659 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
660 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
661 ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
662 store <4 x i32> %3, <4 x i32>* %c
663 ; CHECK-DAG: st.w [[R3]], 0($4)
666 ; CHECK: .size pckev_v4i32_0
669 define void @pckev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
670 ; CHECK: pckev_v2i64_0:
672 %1 = load <2 x i64>* %a
673 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
674 %2 = load <2 x i64>* %b
675 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
676 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
677 ; pckev.d and ilvev.d are equivalent for v2i64
678 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
679 store <2 x i64> %3, <2 x i64>* %c
680 ; CHECK-DAG: st.d [[R3]], 0($4)
683 ; CHECK: .size pckev_v2i64_0
686 define void @pckod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
687 ; CHECK: pckod_v16i8_0:
689 %1 = load <16 x i8>* %a
690 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
691 %2 = load <16 x i8>* %b
692 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
693 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
694 <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
695 ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
696 store <16 x i8> %3, <16 x i8>* %c
697 ; CHECK-DAG: st.b [[R3]], 0($4)
700 ; CHECK: .size pckod_v16i8_0
703 define void @pckod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
704 ; CHECK: pckod_v8i16_0:
706 %1 = load <8 x i16>* %a
707 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
708 %2 = load <8 x i16>* %b
709 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
710 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
711 ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
712 store <8 x i16> %3, <8 x i16>* %c
713 ; CHECK-DAG: st.h [[R3]], 0($4)
716 ; CHECK: .size pckod_v8i16_0
719 define void @pckod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
720 ; CHECK: pckod_v4i32_0:
722 %1 = load <4 x i32>* %a
723 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
724 %2 = load <4 x i32>* %b
725 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
726 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
727 ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
728 store <4 x i32> %3, <4 x i32>* %c
729 ; CHECK-DAG: st.w [[R3]], 0($4)
732 ; CHECK: .size pckod_v4i32_0
735 define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
736 ; CHECK: pckod_v2i64_0:
738 %1 = load <2 x i64>* %a
739 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
740 %2 = load <2 x i64>* %b
741 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
742 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
743 ; pckod.d and ilvod.d are equivalent for v2i64
744 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
745 store <2 x i64> %3, <2 x i64>* %c
746 ; CHECK-DAG: st.d [[R3]], 0($4)
749 ; CHECK: .size pckod_v2i64_0