1 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
2 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
4 define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
7 %1 = load <16 x i8>* %a
8 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
9 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
10 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
11 ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
12 store <16 x i8> %2, <16 x i8>* %c
13 ; CHECK-DAG: st.b [[R3]], 0($4)
16 ; CHECK: .size vshf_v16i8_0
19 define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
20 ; CHECK: vshf_v16i8_1:
22 %1 = load <16 x i8>* %a
23 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
24 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
25 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
26 store <16 x i8> %2, <16 x i8>* %c
27 ; CHECK-DAG: st.b [[R3]], 0($4)
30 ; CHECK: .size vshf_v16i8_1
33 define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
34 ; CHECK: vshf_v16i8_2:
36 %1 = load <16 x i8>* %a
37 %2 = load <16 x i8>* %b
38 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
39 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
40 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
41 ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R2]]
42 store <16 x i8> %3, <16 x i8>* %c
43 ; CHECK-DAG: st.b [[R3]], 0($4)
46 ; CHECK: .size vshf_v16i8_2
49 define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
50 ; CHECK: vshf_v16i8_3:
52 %1 = load <16 x i8>* %a
53 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
54 %2 = load <16 x i8>* %b
55 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
56 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 17, i32 24, i32 25, i32 18, i32 19, i32 20, i32 28, i32 19, i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
57 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
58 ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R2]]
59 store <16 x i8> %3, <16 x i8>* %c
60 ; CHECK-DAG: st.b [[R3]], 0($4)
63 ; CHECK: .size vshf_v16i8_3
66 define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
67 ; CHECK: vshf_v16i8_4:
69 %1 = load <16 x i8>* %a
70 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
71 %2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17>
72 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
73 store <16 x i8> %2, <16 x i8>* %c
74 ; CHECK-DAG: st.b [[R3]], 0($4)
77 ; CHECK: .size vshf_v16i8_4
80 define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
81 ; CHECK: vshf_v8i16_0:
83 %1 = load <8 x i16>* %a
84 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
85 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
86 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
87 ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
88 store <8 x i16> %2, <8 x i16>* %c
89 ; CHECK-DAG: st.h [[R3]], 0($4)
92 ; CHECK: .size vshf_v8i16_0
95 define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
96 ; CHECK: vshf_v8i16_1:
98 %1 = load <8 x i16>* %a
99 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
100 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
101 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
102 store <8 x i16> %2, <8 x i16>* %c
103 ; CHECK-DAG: st.h [[R3]], 0($4)
106 ; CHECK: .size vshf_v8i16_1
109 define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
110 ; CHECK: vshf_v8i16_2:
112 %1 = load <8 x i16>* %a
113 %2 = load <8 x i16>* %b
114 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
115 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
116 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
117 ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R2]]
118 store <8 x i16> %3, <8 x i16>* %c
119 ; CHECK-DAG: st.h [[R3]], 0($4)
122 ; CHECK: .size vshf_v8i16_2
125 define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
126 ; CHECK: vshf_v8i16_3:
128 %1 = load <8 x i16>* %a
129 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
130 %2 = load <8 x i16>* %b
131 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
132 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
133 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
134 ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R2]]
135 store <8 x i16> %3, <8 x i16>* %c
136 ; CHECK-DAG: st.h [[R3]], 0($4)
139 ; CHECK: .size vshf_v8i16_3
142 define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
143 ; CHECK: vshf_v8i16_4:
145 %1 = load <8 x i16>* %a
146 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
147 %2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> <i32 1, i32 9, i32 1, i32 9, i32 1, i32 9, i32 1, i32 9>
148 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
149 store <8 x i16> %2, <8 x i16>* %c
150 ; CHECK-DAG: st.h [[R3]], 0($4)
153 ; CHECK: .size vshf_v8i16_4
156 ; Note: v4i32 only has one 4-element set so it's impossible to get a vshf.w
157 ; instruction when using a single vector.
159 define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
160 ; CHECK: vshf_v4i32_0:
162 %1 = load <4 x i32>* %a
163 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
164 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
165 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
166 store <4 x i32> %2, <4 x i32>* %c
167 ; CHECK-DAG: st.w [[R3]], 0($4)
170 ; CHECK: .size vshf_v4i32_0
173 define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
174 ; CHECK: vshf_v4i32_1:
176 %1 = load <4 x i32>* %a
177 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
178 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
179 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
180 store <4 x i32> %2, <4 x i32>* %c
181 ; CHECK-DAG: st.w [[R3]], 0($4)
184 ; CHECK: .size vshf_v4i32_1
187 define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
188 ; CHECK: vshf_v4i32_2:
190 %1 = load <4 x i32>* %a
191 %2 = load <4 x i32>* %b
192 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
193 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 5, i32 6, i32 4>
194 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R2]], 36
195 store <4 x i32> %3, <4 x i32>* %c
196 ; CHECK-DAG: st.w [[R3]], 0($4)
199 ; CHECK: .size vshf_v4i32_2
202 define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
203 ; CHECK: vshf_v4i32_3:
205 %1 = load <4 x i32>* %a
206 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
207 %2 = load <4 x i32>* %b
208 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
209 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 6, i32 4>
210 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], %lo
211 ; CHECK-DAG: vshf.w [[R3]], [[R1]], [[R2]]
212 store <4 x i32> %3, <4 x i32>* %c
213 ; CHECK-DAG: st.w [[R3]], 0($4)
216 ; CHECK: .size vshf_v4i32_3
219 define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
220 ; CHECK: vshf_v4i32_4:
222 %1 = load <4 x i32>* %a
223 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
224 %2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 1>
225 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
226 store <4 x i32> %2, <4 x i32>* %c
227 ; CHECK-DAG: st.w [[R3]], 0($4)
230 ; CHECK: .size vshf_v4i32_4
233 define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
234 ; CHECK: vshf_v2i64_0:
236 %1 = load <2 x i64>* %a
237 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
238 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
239 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
240 ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
241 store <2 x i64> %2, <2 x i64>* %c
242 ; CHECK-DAG: st.d [[R3]], 0($4)
245 ; CHECK: .size vshf_v2i64_0
248 define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
249 ; CHECK: vshf_v2i64_1:
251 %1 = load <2 x i64>* %a
252 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
253 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
254 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
255 store <2 x i64> %2, <2 x i64>* %c
256 ; CHECK-DAG: st.d [[R3]], 0($4)
259 ; CHECK: .size vshf_v2i64_1
262 define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
263 ; CHECK: vshf_v2i64_2:
265 %1 = load <2 x i64>* %a
266 %2 = load <2 x i64>* %b
267 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
268 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 2>
269 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
270 ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R2]]
271 store <2 x i64> %3, <2 x i64>* %c
272 ; CHECK-DAG: st.d [[R3]], 0($4)
275 ; CHECK: .size vshf_v2i64_2
278 define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
279 ; CHECK: vshf_v2i64_3:
281 %1 = load <2 x i64>* %a
282 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
283 %2 = load <2 x i64>* %b
284 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
285 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 2>
286 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
287 ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R2]]
288 store <2 x i64> %3, <2 x i64>* %c
289 ; CHECK-DAG: st.d [[R3]], 0($4)
292 ; CHECK: .size vshf_v2i64_3
295 define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
296 ; CHECK: vshf_v2i64_4:
298 %1 = load <2 x i64>* %a
299 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
300 %2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> <i32 1, i32 3>
301 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
302 store <2 x i64> %2, <2 x i64>* %c
303 ; CHECK-DAG: st.d [[R3]], 0($4)
306 ; CHECK: .size vshf_v2i64_4
309 define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
310 ; CHECK: shf_v16i8_0:
312 %1 = load <16 x i8>* %a
313 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
314 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 0, i32 5, i32 7, i32 6, i32 4, i32 9, i32 11, i32 10, i32 8, i32 13, i32 15, i32 14, i32 12>
315 ; CHECK-DAG: shf.b [[R3:\$w[0-9]+]], [[R1]], 45
316 store <16 x i8> %2, <16 x i8>* %c
317 ; CHECK-DAG: st.b [[R3]], 0($4)
320 ; CHECK: .size shf_v16i8_0
323 define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
324 ; CHECK: shf_v8i16_0:
326 %1 = load <8 x i16>* %a
327 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
328 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
329 ; CHECK-DAG: shf.h [[R3:\$w[0-9]+]], [[R1]], 27
330 store <8 x i16> %2, <8 x i16>* %c
331 ; CHECK-DAG: st.h [[R3]], 0($4)
334 ; CHECK: .size shf_v8i16_0
337 define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
338 ; CHECK: shf_v4i32_0:
340 %1 = load <4 x i32>* %a
341 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
342 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
343 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
344 store <4 x i32> %2, <4 x i32>* %c
345 ; CHECK-DAG: st.w [[R3]], 0($4)
348 ; CHECK: .size shf_v4i32_0
351 ; shf.d does not exist
353 define void @ilvev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
354 ; CHECK: ilvev_v16i8_0:
356 %1 = load <16 x i8>* %a
357 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
358 %2 = load <16 x i8>* %b
359 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
360 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
361 <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
362 ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
363 store <16 x i8> %3, <16 x i8>* %c
364 ; CHECK-DAG: st.b [[R3]], 0($4)
367 ; CHECK: .size ilvev_v16i8_0
370 define void @ilvev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
371 ; CHECK: ilvev_v8i16_0:
373 %1 = load <8 x i16>* %a
374 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
375 %2 = load <8 x i16>* %b
376 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
377 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
378 ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
379 store <8 x i16> %3, <8 x i16>* %c
380 ; CHECK-DAG: st.h [[R3]], 0($4)
383 ; CHECK: .size ilvev_v8i16_0
386 define void @ilvev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
387 ; CHECK: ilvev_v4i32_0:
389 %1 = load <4 x i32>* %a
390 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
391 %2 = load <4 x i32>* %b
392 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
393 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
394 ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
395 store <4 x i32> %3, <4 x i32>* %c
396 ; CHECK-DAG: st.w [[R3]], 0($4)
399 ; CHECK: .size ilvev_v4i32_0
402 define void @ilvev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
403 ; CHECK: ilvev_v2i64_0:
405 %1 = load <2 x i64>* %a
406 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
407 %2 = load <2 x i64>* %b
408 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
409 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
410 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
411 store <2 x i64> %3, <2 x i64>* %c
412 ; CHECK-DAG: st.d [[R3]], 0($4)
415 ; CHECK: .size ilvev_v2i64_0
418 define void @ilvod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
419 ; CHECK: ilvod_v16i8_0:
421 %1 = load <16 x i8>* %a
422 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
423 %2 = load <16 x i8>* %b
424 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
425 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
426 <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
427 ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
428 store <16 x i8> %3, <16 x i8>* %c
429 ; CHECK-DAG: st.b [[R3]], 0($4)
432 ; CHECK: .size ilvod_v16i8_0
435 define void @ilvod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
436 ; CHECK: ilvod_v8i16_0:
438 %1 = load <8 x i16>* %a
439 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
440 %2 = load <8 x i16>* %b
441 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
442 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
443 ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
444 store <8 x i16> %3, <8 x i16>* %c
445 ; CHECK-DAG: st.h [[R3]], 0($4)
448 ; CHECK: .size ilvod_v8i16_0
451 define void @ilvod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
452 ; CHECK: ilvod_v4i32_0:
454 %1 = load <4 x i32>* %a
455 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
456 %2 = load <4 x i32>* %b
457 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
458 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
459 ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
460 store <4 x i32> %3, <4 x i32>* %c
461 ; CHECK-DAG: st.w [[R3]], 0($4)
464 ; CHECK: .size ilvod_v4i32_0
467 define void @ilvod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
468 ; CHECK: ilvod_v2i64_0:
470 %1 = load <2 x i64>* %a
471 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
472 %2 = load <2 x i64>* %b
473 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
474 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
475 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
476 store <2 x i64> %3, <2 x i64>* %c
477 ; CHECK-DAG: st.d [[R3]], 0($4)
480 ; CHECK: .size ilvod_v2i64_0
483 define void @ilvl_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
484 ; CHECK: ilvl_v16i8_0:
486 %1 = load <16 x i8>* %a
487 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
488 %2 = load <16 x i8>* %b
489 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
490 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
491 <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
492 ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
493 store <16 x i8> %3, <16 x i8>* %c
494 ; CHECK-DAG: st.b [[R3]], 0($4)
497 ; CHECK: .size ilvl_v16i8_0
500 define void @ilvl_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
501 ; CHECK: ilvl_v8i16_0:
503 %1 = load <8 x i16>* %a
504 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
505 %2 = load <8 x i16>* %b
506 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
507 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
508 ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
509 store <8 x i16> %3, <8 x i16>* %c
510 ; CHECK-DAG: st.h [[R3]], 0($4)
513 ; CHECK: .size ilvl_v8i16_0
516 define void @ilvl_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
517 ; CHECK: ilvl_v4i32_0:
519 %1 = load <4 x i32>* %a
520 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
521 %2 = load <4 x i32>* %b
522 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
523 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
524 ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
525 store <4 x i32> %3, <4 x i32>* %c
526 ; CHECK-DAG: st.w [[R3]], 0($4)
529 ; CHECK: .size ilvl_v4i32_0
532 define void @ilvl_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
533 ; CHECK: ilvl_v2i64_0:
535 %1 = load <2 x i64>* %a
536 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
537 %2 = load <2 x i64>* %b
538 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
539 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
540 ; ilvl.d and ilvev.d are equivalent for v2i64
541 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
542 store <2 x i64> %3, <2 x i64>* %c
543 ; CHECK-DAG: st.d [[R3]], 0($4)
546 ; CHECK: .size ilvl_v2i64_0
549 define void @ilvr_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
550 ; CHECK: ilvr_v16i8_0:
552 %1 = load <16 x i8>* %a
553 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
554 %2 = load <16 x i8>* %b
555 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
556 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
557 <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
558 ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
559 store <16 x i8> %3, <16 x i8>* %c
560 ; CHECK-DAG: st.b [[R3]], 0($4)
563 ; CHECK: .size ilvr_v16i8_0
566 define void @ilvr_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
567 ; CHECK: ilvr_v8i16_0:
569 %1 = load <8 x i16>* %a
570 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
571 %2 = load <8 x i16>* %b
572 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
573 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
574 ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
575 store <8 x i16> %3, <8 x i16>* %c
576 ; CHECK-DAG: st.h [[R3]], 0($4)
579 ; CHECK: .size ilvr_v8i16_0
582 define void @ilvr_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
583 ; CHECK: ilvr_v4i32_0:
585 %1 = load <4 x i32>* %a
586 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
587 %2 = load <4 x i32>* %b
588 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
589 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
590 ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
591 store <4 x i32> %3, <4 x i32>* %c
592 ; CHECK-DAG: st.w [[R3]], 0($4)
595 ; CHECK: .size ilvr_v4i32_0
598 define void @ilvr_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
599 ; CHECK: ilvr_v2i64_0:
601 %1 = load <2 x i64>* %a
602 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
603 %2 = load <2 x i64>* %b
604 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
605 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
606 ; ilvr.d and ilvod.d are equivalent for v2i64
607 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
608 store <2 x i64> %3, <2 x i64>* %c
609 ; CHECK-DAG: st.d [[R3]], 0($4)
612 ; CHECK: .size ilvr_v2i64_0
615 define void @pckev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
616 ; CHECK: pckev_v16i8_0:
618 %1 = load <16 x i8>* %a
619 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
620 %2 = load <16 x i8>* %b
621 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
622 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
623 <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
624 ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
625 store <16 x i8> %3, <16 x i8>* %c
626 ; CHECK-DAG: st.b [[R3]], 0($4)
629 ; CHECK: .size pckev_v16i8_0
632 define void @pckev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
633 ; CHECK: pckev_v8i16_0:
635 %1 = load <8 x i16>* %a
636 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
637 %2 = load <8 x i16>* %b
638 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
639 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
640 ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
641 store <8 x i16> %3, <8 x i16>* %c
642 ; CHECK-DAG: st.h [[R3]], 0($4)
645 ; CHECK: .size pckev_v8i16_0
648 define void @pckev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
649 ; CHECK: pckev_v4i32_0:
651 %1 = load <4 x i32>* %a
652 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
653 %2 = load <4 x i32>* %b
654 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
655 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
656 ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
657 store <4 x i32> %3, <4 x i32>* %c
658 ; CHECK-DAG: st.w [[R3]], 0($4)
661 ; CHECK: .size pckev_v4i32_0
664 define void @pckev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
665 ; CHECK: pckev_v2i64_0:
667 %1 = load <2 x i64>* %a
668 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
669 %2 = load <2 x i64>* %b
670 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
671 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
672 ; pckev.d and ilvev.d are equivalent for v2i64
673 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
674 store <2 x i64> %3, <2 x i64>* %c
675 ; CHECK-DAG: st.d [[R3]], 0($4)
678 ; CHECK: .size pckev_v2i64_0
681 define void @pckod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
682 ; CHECK: pckod_v16i8_0:
684 %1 = load <16 x i8>* %a
685 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
686 %2 = load <16 x i8>* %b
687 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
688 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
689 <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
690 ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
691 store <16 x i8> %3, <16 x i8>* %c
692 ; CHECK-DAG: st.b [[R3]], 0($4)
695 ; CHECK: .size pckod_v16i8_0
698 define void @pckod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
699 ; CHECK: pckod_v8i16_0:
701 %1 = load <8 x i16>* %a
702 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
703 %2 = load <8 x i16>* %b
704 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
705 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
706 ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
707 store <8 x i16> %3, <8 x i16>* %c
708 ; CHECK-DAG: st.h [[R3]], 0($4)
711 ; CHECK: .size pckod_v8i16_0
714 define void @pckod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
715 ; CHECK: pckod_v4i32_0:
717 %1 = load <4 x i32>* %a
718 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
719 %2 = load <4 x i32>* %b
720 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
721 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
722 ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
723 store <4 x i32> %3, <4 x i32>* %c
724 ; CHECK-DAG: st.w [[R3]], 0($4)
727 ; CHECK: .size pckod_v4i32_0
730 define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
731 ; CHECK: pckod_v2i64_0:
733 %1 = load <2 x i64>* %a
734 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
735 %2 = load <2 x i64>* %b
736 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
737 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
738 ; pckod.d and ilvod.d are equivalent for v2i64
739 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
740 store <2 x i64> %3, <2 x i64>* %c
741 ; CHECK-DAG: st.d [[R3]], 0($4)
744 ; CHECK: .size pckod_v2i64_0
747 define void @splati_v16i8_0(<16 x i8>* %c, <16 x i8>* %a) nounwind {
748 ; CHECK: splati_v16i8_0:
750 %1 = load <16 x i8>* %a
751 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
752 %2 = shufflevector <16 x i8> %1, <16 x i8> undef,
753 <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
754 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][4]
755 store <16 x i8> %2, <16 x i8>* %c
756 ; CHECK-DAG: st.b [[R3]], 0($4)
759 ; CHECK: .size splati_v16i8_0
762 define void @splati_v8i16_0(<8 x i16>* %c, <8 x i16>* %a) nounwind {
763 ; CHECK: splati_v8i16_0:
765 %1 = load <8 x i16>* %a
766 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
767 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
768 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][4]
769 store <8 x i16> %2, <8 x i16>* %c
770 ; CHECK-DAG: st.h [[R3]], 0($4)
773 ; CHECK: .size splati_v8i16_0
776 define void @splati_v4i32_0(<4 x i32>* %c, <4 x i32>* %a) nounwind {
777 ; CHECK: splati_v4i32_0:
779 %1 = load <4 x i32>* %a
780 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
781 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
782 ; shf.w and splati.w are equivalent
783 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 255
784 store <4 x i32> %2, <4 x i32>* %c
785 ; CHECK-DAG: st.w [[R3]], 0($4)
788 ; CHECK: .size splati_v4i32_0
791 define void @splati_v2i64_0(<2 x i64>* %c, <2 x i64>* %a) nounwind {
792 ; CHECK: splati_v2i64_0:
794 %1 = load <2 x i64>* %a
795 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
796 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
797 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
798 store <2 x i64> %2, <2 x i64>* %c
799 ; CHECK-DAG: st.d [[R3]], 0($4)
802 ; CHECK: .size splati_v2i64_0