1 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
2 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
4 define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
7 %1 = load <16 x i8>* %a
8 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
9 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
10 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
11 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]])
12 ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
13 store <16 x i8> %2, <16 x i8>* %c
14 ; CHECK-DAG: st.b [[R3]], 0($4)
17 ; CHECK: .size vshf_v16i8_0
20 define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
21 ; CHECK: vshf_v16i8_1:
23 %1 = load <16 x i8>* %a
24 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
25 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
26 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
27 store <16 x i8> %2, <16 x i8>* %c
28 ; CHECK-DAG: st.b [[R3]], 0($4)
31 ; CHECK: .size vshf_v16i8_1
34 define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
35 ; CHECK: vshf_v16i8_2:
37 %1 = load <16 x i8>* %a
38 %2 = load <16 x i8>* %b
39 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
40 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
41 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
42 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]])
43 ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R2]]
44 store <16 x i8> %3, <16 x i8>* %c
45 ; CHECK-DAG: st.b [[R3]], 0($4)
48 ; CHECK: .size vshf_v16i8_2
51 define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
52 ; CHECK: vshf_v16i8_3:
54 %1 = load <16 x i8>* %a
55 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
56 %2 = load <16 x i8>* %b
57 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
58 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 17, i32 24, i32 25, i32 18, i32 19, i32 20, i32 28, i32 19, i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
59 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
60 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]])
61 ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R2]]
62 store <16 x i8> %3, <16 x i8>* %c
63 ; CHECK-DAG: st.b [[R3]], 0($4)
66 ; CHECK: .size vshf_v16i8_3
69 define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
70 ; CHECK: vshf_v16i8_4:
72 %1 = load <16 x i8>* %a
73 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
74 %2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17>
75 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
76 store <16 x i8> %2, <16 x i8>* %c
77 ; CHECK-DAG: st.b [[R3]], 0($4)
80 ; CHECK: .size vshf_v16i8_4
83 define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
84 ; CHECK: vshf_v8i16_0:
86 %1 = load <8 x i16>* %a
87 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
88 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
89 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
90 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]])
91 ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
92 store <8 x i16> %2, <8 x i16>* %c
93 ; CHECK-DAG: st.h [[R3]], 0($4)
96 ; CHECK: .size vshf_v8i16_0
99 define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
100 ; CHECK: vshf_v8i16_1:
102 %1 = load <8 x i16>* %a
103 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
104 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
105 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
106 store <8 x i16> %2, <8 x i16>* %c
107 ; CHECK-DAG: st.h [[R3]], 0($4)
110 ; CHECK: .size vshf_v8i16_1
113 define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
114 ; CHECK: vshf_v8i16_2:
116 %1 = load <8 x i16>* %a
117 %2 = load <8 x i16>* %b
118 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
119 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
120 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
121 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]])
122 ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R2]]
123 store <8 x i16> %3, <8 x i16>* %c
124 ; CHECK-DAG: st.h [[R3]], 0($4)
127 ; CHECK: .size vshf_v8i16_2
130 define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
131 ; CHECK: vshf_v8i16_3:
133 %1 = load <8 x i16>* %a
134 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
135 %2 = load <8 x i16>* %b
136 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
137 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
138 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
139 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]])
140 ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R2]]
141 store <8 x i16> %3, <8 x i16>* %c
142 ; CHECK-DAG: st.h [[R3]], 0($4)
145 ; CHECK: .size vshf_v8i16_3
148 define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
149 ; CHECK: vshf_v8i16_4:
151 %1 = load <8 x i16>* %a
152 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
153 %2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> <i32 1, i32 9, i32 1, i32 9, i32 1, i32 9, i32 1, i32 9>
154 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
155 store <8 x i16> %2, <8 x i16>* %c
156 ; CHECK-DAG: st.h [[R3]], 0($4)
159 ; CHECK: .size vshf_v8i16_4
162 ; Note: v4i32 only has one 4-element set so it's impossible to get a vshf.w
163 ; instruction when using a single vector.
165 define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
166 ; CHECK: vshf_v4i32_0:
168 %1 = load <4 x i32>* %a
169 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
170 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
171 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
172 store <4 x i32> %2, <4 x i32>* %c
173 ; CHECK-DAG: st.w [[R3]], 0($4)
176 ; CHECK: .size vshf_v4i32_0
179 define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
180 ; CHECK: vshf_v4i32_1:
182 %1 = load <4 x i32>* %a
183 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
184 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
185 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
186 store <4 x i32> %2, <4 x i32>* %c
187 ; CHECK-DAG: st.w [[R3]], 0($4)
190 ; CHECK: .size vshf_v4i32_1
193 define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
194 ; CHECK: vshf_v4i32_2:
196 %1 = load <4 x i32>* %a
197 %2 = load <4 x i32>* %b
198 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
199 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 5, i32 6, i32 4>
200 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R2]], 36
201 store <4 x i32> %3, <4 x i32>* %c
202 ; CHECK-DAG: st.w [[R3]], 0($4)
205 ; CHECK: .size vshf_v4i32_2
208 define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
209 ; CHECK: vshf_v4i32_3:
211 %1 = load <4 x i32>* %a
212 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
213 %2 = load <4 x i32>* %b
214 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
215 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 6, i32 4>
216 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
217 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0([[PTR_A]])
218 ; CHECK-DAG: vshf.w [[R3]], [[R1]], [[R2]]
219 store <4 x i32> %3, <4 x i32>* %c
220 ; CHECK-DAG: st.w [[R3]], 0($4)
223 ; CHECK: .size vshf_v4i32_3
226 define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
227 ; CHECK: vshf_v4i32_4:
229 %1 = load <4 x i32>* %a
230 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
231 %2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 1>
232 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
233 store <4 x i32> %2, <4 x i32>* %c
234 ; CHECK-DAG: st.w [[R3]], 0($4)
237 ; CHECK: .size vshf_v4i32_4
240 define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
241 ; CHECK: vshf_v2i64_0:
243 %1 = load <2 x i64>* %a
244 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
245 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
246 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
247 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]])
248 ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
249 store <2 x i64> %2, <2 x i64>* %c
250 ; CHECK-DAG: st.d [[R3]], 0($4)
253 ; CHECK: .size vshf_v2i64_0
256 define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
257 ; CHECK: vshf_v2i64_1:
259 %1 = load <2 x i64>* %a
260 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
261 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
262 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
263 store <2 x i64> %2, <2 x i64>* %c
264 ; CHECK-DAG: st.d [[R3]], 0($4)
267 ; CHECK: .size vshf_v2i64_1
270 define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
271 ; CHECK: vshf_v2i64_2:
273 %1 = load <2 x i64>* %a
274 %2 = load <2 x i64>* %b
275 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
276 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 2>
277 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
278 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]])
279 ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R2]]
280 store <2 x i64> %3, <2 x i64>* %c
281 ; CHECK-DAG: st.d [[R3]], 0($4)
284 ; CHECK: .size vshf_v2i64_2
287 define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
288 ; CHECK: vshf_v2i64_3:
290 %1 = load <2 x i64>* %a
291 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
292 %2 = load <2 x i64>* %b
293 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
294 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 2>
295 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
296 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]])
297 ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R2]]
298 store <2 x i64> %3, <2 x i64>* %c
299 ; CHECK-DAG: st.d [[R3]], 0($4)
302 ; CHECK: .size vshf_v2i64_3
305 define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
306 ; CHECK: vshf_v2i64_4:
308 %1 = load <2 x i64>* %a
309 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
310 %2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> <i32 1, i32 3>
311 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
312 store <2 x i64> %2, <2 x i64>* %c
313 ; CHECK-DAG: st.d [[R3]], 0($4)
316 ; CHECK: .size vshf_v2i64_4
319 define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
320 ; CHECK: shf_v16i8_0:
322 %1 = load <16 x i8>* %a
323 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
324 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 0, i32 5, i32 7, i32 6, i32 4, i32 9, i32 11, i32 10, i32 8, i32 13, i32 15, i32 14, i32 12>
325 ; CHECK-DAG: shf.b [[R3:\$w[0-9]+]], [[R1]], 45
326 store <16 x i8> %2, <16 x i8>* %c
327 ; CHECK-DAG: st.b [[R3]], 0($4)
330 ; CHECK: .size shf_v16i8_0
333 define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
334 ; CHECK: shf_v8i16_0:
336 %1 = load <8 x i16>* %a
337 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
338 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
339 ; CHECK-DAG: shf.h [[R3:\$w[0-9]+]], [[R1]], 27
340 store <8 x i16> %2, <8 x i16>* %c
341 ; CHECK-DAG: st.h [[R3]], 0($4)
344 ; CHECK: .size shf_v8i16_0
347 define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
348 ; CHECK: shf_v4i32_0:
350 %1 = load <4 x i32>* %a
351 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
352 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
353 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
354 store <4 x i32> %2, <4 x i32>* %c
355 ; CHECK-DAG: st.w [[R3]], 0($4)
358 ; CHECK: .size shf_v4i32_0
361 ; shf.d does not exist
363 define void @ilvev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
364 ; CHECK: ilvev_v16i8_0:
366 %1 = load <16 x i8>* %a
367 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
368 %2 = load <16 x i8>* %b
369 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
370 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
371 <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
372 ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
373 store <16 x i8> %3, <16 x i8>* %c
374 ; CHECK-DAG: st.b [[R3]], 0($4)
377 ; CHECK: .size ilvev_v16i8_0
380 define void @ilvev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
381 ; CHECK: ilvev_v8i16_0:
383 %1 = load <8 x i16>* %a
384 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
385 %2 = load <8 x i16>* %b
386 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
387 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
388 ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
389 store <8 x i16> %3, <8 x i16>* %c
390 ; CHECK-DAG: st.h [[R3]], 0($4)
393 ; CHECK: .size ilvev_v8i16_0
396 define void @ilvev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
397 ; CHECK: ilvev_v4i32_0:
399 %1 = load <4 x i32>* %a
400 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
401 %2 = load <4 x i32>* %b
402 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
403 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
404 ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
405 store <4 x i32> %3, <4 x i32>* %c
406 ; CHECK-DAG: st.w [[R3]], 0($4)
409 ; CHECK: .size ilvev_v4i32_0
412 define void @ilvev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
413 ; CHECK: ilvev_v2i64_0:
415 %1 = load <2 x i64>* %a
416 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
417 %2 = load <2 x i64>* %b
418 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
419 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
420 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
421 store <2 x i64> %3, <2 x i64>* %c
422 ; CHECK-DAG: st.d [[R3]], 0($4)
425 ; CHECK: .size ilvev_v2i64_0
428 define void @ilvod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
429 ; CHECK: ilvod_v16i8_0:
431 %1 = load <16 x i8>* %a
432 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
433 %2 = load <16 x i8>* %b
434 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
435 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
436 <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
437 ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
438 store <16 x i8> %3, <16 x i8>* %c
439 ; CHECK-DAG: st.b [[R3]], 0($4)
442 ; CHECK: .size ilvod_v16i8_0
445 define void @ilvod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
446 ; CHECK: ilvod_v8i16_0:
448 %1 = load <8 x i16>* %a
449 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
450 %2 = load <8 x i16>* %b
451 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
452 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
453 ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
454 store <8 x i16> %3, <8 x i16>* %c
455 ; CHECK-DAG: st.h [[R3]], 0($4)
458 ; CHECK: .size ilvod_v8i16_0
461 define void @ilvod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
462 ; CHECK: ilvod_v4i32_0:
464 %1 = load <4 x i32>* %a
465 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
466 %2 = load <4 x i32>* %b
467 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
468 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
469 ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
470 store <4 x i32> %3, <4 x i32>* %c
471 ; CHECK-DAG: st.w [[R3]], 0($4)
474 ; CHECK: .size ilvod_v4i32_0
477 define void @ilvod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
478 ; CHECK: ilvod_v2i64_0:
480 %1 = load <2 x i64>* %a
481 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
482 %2 = load <2 x i64>* %b
483 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
484 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
485 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
486 store <2 x i64> %3, <2 x i64>* %c
487 ; CHECK-DAG: st.d [[R3]], 0($4)
490 ; CHECK: .size ilvod_v2i64_0
493 define void @ilvl_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
494 ; CHECK: ilvl_v16i8_0:
496 %1 = load <16 x i8>* %a
497 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
498 %2 = load <16 x i8>* %b
499 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
500 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
501 <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
502 ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
503 store <16 x i8> %3, <16 x i8>* %c
504 ; CHECK-DAG: st.b [[R3]], 0($4)
507 ; CHECK: .size ilvl_v16i8_0
510 define void @ilvl_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
511 ; CHECK: ilvl_v8i16_0:
513 %1 = load <8 x i16>* %a
514 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
515 %2 = load <8 x i16>* %b
516 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
517 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
518 ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
519 store <8 x i16> %3, <8 x i16>* %c
520 ; CHECK-DAG: st.h [[R3]], 0($4)
523 ; CHECK: .size ilvl_v8i16_0
526 define void @ilvl_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
527 ; CHECK: ilvl_v4i32_0:
529 %1 = load <4 x i32>* %a
530 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
531 %2 = load <4 x i32>* %b
532 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
533 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
534 ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
535 store <4 x i32> %3, <4 x i32>* %c
536 ; CHECK-DAG: st.w [[R3]], 0($4)
539 ; CHECK: .size ilvl_v4i32_0
542 define void @ilvl_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
543 ; CHECK: ilvl_v2i64_0:
545 %1 = load <2 x i64>* %a
546 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
547 %2 = load <2 x i64>* %b
548 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
549 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
550 ; ilvl.d and ilvev.d are equivalent for v2i64
551 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
552 store <2 x i64> %3, <2 x i64>* %c
553 ; CHECK-DAG: st.d [[R3]], 0($4)
556 ; CHECK: .size ilvl_v2i64_0
559 define void @ilvr_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
560 ; CHECK: ilvr_v16i8_0:
562 %1 = load <16 x i8>* %a
563 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
564 %2 = load <16 x i8>* %b
565 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
566 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
567 <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
568 ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
569 store <16 x i8> %3, <16 x i8>* %c
570 ; CHECK-DAG: st.b [[R3]], 0($4)
573 ; CHECK: .size ilvr_v16i8_0
576 define void @ilvr_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
577 ; CHECK: ilvr_v8i16_0:
579 %1 = load <8 x i16>* %a
580 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
581 %2 = load <8 x i16>* %b
582 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
583 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
584 ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
585 store <8 x i16> %3, <8 x i16>* %c
586 ; CHECK-DAG: st.h [[R3]], 0($4)
589 ; CHECK: .size ilvr_v8i16_0
592 define void @ilvr_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
593 ; CHECK: ilvr_v4i32_0:
595 %1 = load <4 x i32>* %a
596 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
597 %2 = load <4 x i32>* %b
598 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
599 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
600 ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
601 store <4 x i32> %3, <4 x i32>* %c
602 ; CHECK-DAG: st.w [[R3]], 0($4)
605 ; CHECK: .size ilvr_v4i32_0
608 define void @ilvr_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
609 ; CHECK: ilvr_v2i64_0:
611 %1 = load <2 x i64>* %a
612 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
613 %2 = load <2 x i64>* %b
614 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
615 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
616 ; ilvr.d and ilvod.d are equivalent for v2i64
617 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
618 store <2 x i64> %3, <2 x i64>* %c
619 ; CHECK-DAG: st.d [[R3]], 0($4)
622 ; CHECK: .size ilvr_v2i64_0
625 define void @pckev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
626 ; CHECK: pckev_v16i8_0:
628 %1 = load <16 x i8>* %a
629 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
630 %2 = load <16 x i8>* %b
631 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
632 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
633 <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
634 ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
635 store <16 x i8> %3, <16 x i8>* %c
636 ; CHECK-DAG: st.b [[R3]], 0($4)
639 ; CHECK: .size pckev_v16i8_0
642 define void @pckev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
643 ; CHECK: pckev_v8i16_0:
645 %1 = load <8 x i16>* %a
646 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
647 %2 = load <8 x i16>* %b
648 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
649 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
650 ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
651 store <8 x i16> %3, <8 x i16>* %c
652 ; CHECK-DAG: st.h [[R3]], 0($4)
655 ; CHECK: .size pckev_v8i16_0
658 define void @pckev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
659 ; CHECK: pckev_v4i32_0:
661 %1 = load <4 x i32>* %a
662 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
663 %2 = load <4 x i32>* %b
664 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
665 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
666 ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
667 store <4 x i32> %3, <4 x i32>* %c
668 ; CHECK-DAG: st.w [[R3]], 0($4)
671 ; CHECK: .size pckev_v4i32_0
674 define void @pckev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
675 ; CHECK: pckev_v2i64_0:
677 %1 = load <2 x i64>* %a
678 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
679 %2 = load <2 x i64>* %b
680 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
681 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
682 ; pckev.d and ilvev.d are equivalent for v2i64
683 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
684 store <2 x i64> %3, <2 x i64>* %c
685 ; CHECK-DAG: st.d [[R3]], 0($4)
688 ; CHECK: .size pckev_v2i64_0
691 define void @pckod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
692 ; CHECK: pckod_v16i8_0:
694 %1 = load <16 x i8>* %a
695 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
696 %2 = load <16 x i8>* %b
697 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
698 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
699 <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
700 ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
701 store <16 x i8> %3, <16 x i8>* %c
702 ; CHECK-DAG: st.b [[R3]], 0($4)
705 ; CHECK: .size pckod_v16i8_0
708 define void @pckod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
709 ; CHECK: pckod_v8i16_0:
711 %1 = load <8 x i16>* %a
712 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
713 %2 = load <8 x i16>* %b
714 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
715 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
716 ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
717 store <8 x i16> %3, <8 x i16>* %c
718 ; CHECK-DAG: st.h [[R3]], 0($4)
721 ; CHECK: .size pckod_v8i16_0
724 define void @pckod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
725 ; CHECK: pckod_v4i32_0:
727 %1 = load <4 x i32>* %a
728 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
729 %2 = load <4 x i32>* %b
730 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
731 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
732 ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
733 store <4 x i32> %3, <4 x i32>* %c
734 ; CHECK-DAG: st.w [[R3]], 0($4)
737 ; CHECK: .size pckod_v4i32_0
740 define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
741 ; CHECK: pckod_v2i64_0:
743 %1 = load <2 x i64>* %a
744 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
745 %2 = load <2 x i64>* %b
746 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
747 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
748 ; pckod.d and ilvod.d are equivalent for v2i64
749 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
750 store <2 x i64> %3, <2 x i64>* %c
751 ; CHECK-DAG: st.d [[R3]], 0($4)
754 ; CHECK: .size pckod_v2i64_0
757 define void @splati_v16i8_0(<16 x i8>* %c, <16 x i8>* %a) nounwind {
758 ; CHECK: splati_v16i8_0:
760 %1 = load <16 x i8>* %a
761 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
762 %2 = shufflevector <16 x i8> %1, <16 x i8> undef,
763 <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
764 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][4]
765 store <16 x i8> %2, <16 x i8>* %c
766 ; CHECK-DAG: st.b [[R3]], 0($4)
769 ; CHECK: .size splati_v16i8_0
772 define void @splati_v8i16_0(<8 x i16>* %c, <8 x i16>* %a) nounwind {
773 ; CHECK: splati_v8i16_0:
775 %1 = load <8 x i16>* %a
776 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
777 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
778 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][4]
779 store <8 x i16> %2, <8 x i16>* %c
780 ; CHECK-DAG: st.h [[R3]], 0($4)
783 ; CHECK: .size splati_v8i16_0
786 define void @splati_v4i32_0(<4 x i32>* %c, <4 x i32>* %a) nounwind {
787 ; CHECK: splati_v4i32_0:
789 %1 = load <4 x i32>* %a
790 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
791 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
792 ; shf.w and splati.w are equivalent
793 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 255
794 store <4 x i32> %2, <4 x i32>* %c
795 ; CHECK-DAG: st.w [[R3]], 0($4)
798 ; CHECK: .size splati_v4i32_0
801 define void @splati_v2i64_0(<2 x i64>* %c, <2 x i64>* %a) nounwind {
802 ; CHECK: splati_v2i64_0:
804 %1 = load <2 x i64>* %a
805 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
806 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
807 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
808 store <2 x i64> %2, <2 x i64>* %c
809 ; CHECK-DAG: st.d [[R3]], 0($4)
812 ; CHECK: .size splati_v2i64_0