1 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
2 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
4 define void @add_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
7 %1 = load <16 x i8>* %a
8 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
9 %2 = load <16 x i8>* %b
10 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
11 %3 = add <16 x i8> %1, %2
12 ; CHECK-DAG: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
13 store <16 x i8> %3, <16 x i8>* %c
14 ; CHECK-DAG: st.b [[R3]], 0($4)
17 ; CHECK: .size add_v16i8
20 define void @add_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
23 %1 = load <8 x i16>* %a
24 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
25 %2 = load <8 x i16>* %b
26 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
27 %3 = add <8 x i16> %1, %2
28 ; CHECK-DAG: addv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
29 store <8 x i16> %3, <8 x i16>* %c
30 ; CHECK-DAG: st.h [[R3]], 0($4)
33 ; CHECK: .size add_v8i16
36 define void @add_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
39 %1 = load <4 x i32>* %a
40 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
41 %2 = load <4 x i32>* %b
42 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
43 %3 = add <4 x i32> %1, %2
44 ; CHECK-DAG: addv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
45 store <4 x i32> %3, <4 x i32>* %c
46 ; CHECK-DAG: st.w [[R3]], 0($4)
49 ; CHECK: .size add_v4i32
52 define void @add_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
55 %1 = load <2 x i64>* %a
56 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
57 %2 = load <2 x i64>* %b
58 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
59 %3 = add <2 x i64> %1, %2
60 ; CHECK-DAG: addv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
61 store <2 x i64> %3, <2 x i64>* %c
62 ; CHECK-DAG: st.d [[R3]], 0($4)
65 ; CHECK: .size add_v2i64
68 define void @add_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
71 %1 = load <16 x i8>* %a
72 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
73 %2 = add <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
74 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
75 ; CHECK-DAG: addvi.b [[R3:\$w[0-9]+]], [[R1]], 1
76 store <16 x i8> %2, <16 x i8>* %c
77 ; CHECK-DAG: st.b [[R3]], 0($4)
80 ; CHECK: .size add_v16i8_i
83 define void @add_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
86 %1 = load <8 x i16>* %a
87 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
88 %2 = add <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1,
89 i16 1, i16 1, i16 1, i16 1>
90 ; CHECK-DAG: addvi.h [[R3:\$w[0-9]+]], [[R1]], 1
91 store <8 x i16> %2, <8 x i16>* %c
92 ; CHECK-DAG: st.h [[R3]], 0($4)
95 ; CHECK: .size add_v8i16_i
98 define void @add_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
101 %1 = load <4 x i32>* %a
102 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
103 %2 = add <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
104 ; CHECK-DAG: addvi.w [[R3:\$w[0-9]+]], [[R1]], 1
105 store <4 x i32> %2, <4 x i32>* %c
106 ; CHECK-DAG: st.w [[R3]], 0($4)
109 ; CHECK: .size add_v4i32_i
112 define void @add_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
113 ; CHECK: add_v2i64_i:
115 %1 = load <2 x i64>* %a
116 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
117 %2 = add <2 x i64> %1, <i64 1, i64 1>
118 ; CHECK-DAG: addvi.d [[R3:\$w[0-9]+]], [[R1]], 1
119 store <2 x i64> %2, <2 x i64>* %c
120 ; CHECK-DAG: st.d [[R3]], 0($4)
123 ; CHECK: .size add_v2i64_i
126 define void @sub_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
129 %1 = load <16 x i8>* %a
130 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
131 %2 = load <16 x i8>* %b
132 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
133 %3 = sub <16 x i8> %1, %2
134 ; CHECK-DAG: subv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
135 store <16 x i8> %3, <16 x i8>* %c
136 ; CHECK-DAG: st.b [[R3]], 0($4)
139 ; CHECK: .size sub_v16i8
142 define void @sub_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
145 %1 = load <8 x i16>* %a
146 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
147 %2 = load <8 x i16>* %b
148 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
149 %3 = sub <8 x i16> %1, %2
150 ; CHECK-DAG: subv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
151 store <8 x i16> %3, <8 x i16>* %c
152 ; CHECK-DAG: st.h [[R3]], 0($4)
155 ; CHECK: .size sub_v8i16
158 define void @sub_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
161 %1 = load <4 x i32>* %a
162 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
163 %2 = load <4 x i32>* %b
164 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
165 %3 = sub <4 x i32> %1, %2
166 ; CHECK-DAG: subv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
167 store <4 x i32> %3, <4 x i32>* %c
168 ; CHECK-DAG: st.w [[R3]], 0($4)
171 ; CHECK: .size sub_v4i32
174 define void @sub_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
177 %1 = load <2 x i64>* %a
178 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
179 %2 = load <2 x i64>* %b
180 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
181 %3 = sub <2 x i64> %1, %2
182 ; CHECK-DAG: subv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
183 store <2 x i64> %3, <2 x i64>* %c
184 ; CHECK-DAG: st.d [[R3]], 0($4)
187 ; CHECK: .size sub_v2i64
190 define void @sub_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
191 ; CHECK: sub_v16i8_i:
193 %1 = load <16 x i8>* %a
194 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
195 %2 = sub <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
196 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
197 ; CHECK-DAG: subvi.b [[R3:\$w[0-9]+]], [[R1]], 1
198 store <16 x i8> %2, <16 x i8>* %c
199 ; CHECK-DAG: st.b [[R3]], 0($4)
202 ; CHECK: .size sub_v16i8_i
205 define void @sub_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
206 ; CHECK: sub_v8i16_i:
208 %1 = load <8 x i16>* %a
209 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
210 %2 = sub <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1,
211 i16 1, i16 1, i16 1, i16 1>
212 ; CHECK-DAG: subvi.h [[R3:\$w[0-9]+]], [[R1]], 1
213 store <8 x i16> %2, <8 x i16>* %c
214 ; CHECK-DAG: st.h [[R3]], 0($4)
217 ; CHECK: .size sub_v8i16_i
220 define void @sub_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
221 ; CHECK: sub_v4i32_i:
223 %1 = load <4 x i32>* %a
224 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
225 %2 = sub <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
226 ; CHECK-DAG: subvi.w [[R3:\$w[0-9]+]], [[R1]], 1
227 store <4 x i32> %2, <4 x i32>* %c
228 ; CHECK-DAG: st.w [[R3]], 0($4)
231 ; CHECK: .size sub_v4i32_i
234 define void @sub_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
235 ; CHECK: sub_v2i64_i:
237 %1 = load <2 x i64>* %a
238 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
239 %2 = sub <2 x i64> %1, <i64 1, i64 1>
240 ; CHECK-DAG: subvi.d [[R3:\$w[0-9]+]], [[R1]], 1
241 store <2 x i64> %2, <2 x i64>* %c
242 ; CHECK-DAG: st.d [[R3]], 0($4)
245 ; CHECK: .size sub_v2i64_i
248 define void @mul_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
251 %1 = load <16 x i8>* %a
252 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
253 %2 = load <16 x i8>* %b
254 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
255 %3 = mul <16 x i8> %1, %2
256 ; CHECK-DAG: mulv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
257 store <16 x i8> %3, <16 x i8>* %c
258 ; CHECK-DAG: st.b [[R3]], 0($4)
261 ; CHECK: .size mul_v16i8
264 define void @mul_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
267 %1 = load <8 x i16>* %a
268 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
269 %2 = load <8 x i16>* %b
270 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
271 %3 = mul <8 x i16> %1, %2
272 ; CHECK-DAG: mulv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
273 store <8 x i16> %3, <8 x i16>* %c
274 ; CHECK-DAG: st.h [[R3]], 0($4)
277 ; CHECK: .size mul_v8i16
280 define void @mul_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
283 %1 = load <4 x i32>* %a
284 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
285 %2 = load <4 x i32>* %b
286 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
287 %3 = mul <4 x i32> %1, %2
288 ; CHECK-DAG: mulv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
289 store <4 x i32> %3, <4 x i32>* %c
290 ; CHECK-DAG: st.w [[R3]], 0($4)
293 ; CHECK: .size mul_v4i32
296 define void @mul_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
299 %1 = load <2 x i64>* %a
300 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
301 %2 = load <2 x i64>* %b
302 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
303 %3 = mul <2 x i64> %1, %2
304 ; CHECK-DAG: mulv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
305 store <2 x i64> %3, <2 x i64>* %c
306 ; CHECK-DAG: st.d [[R3]], 0($4)
309 ; CHECK: .size mul_v2i64
312 define void @maddv_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
313 <16 x i8>* %c) nounwind {
314 ; CHECK: maddv_v16i8:
316 %1 = load <16 x i8>* %a
317 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
318 %2 = load <16 x i8>* %b
319 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
320 %3 = load <16 x i8>* %c
321 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
322 %4 = mul <16 x i8> %2, %3
323 %5 = add <16 x i8> %4, %1
324 ; CHECK-DAG: maddv.b [[R1]], [[R2]], [[R3]]
325 store <16 x i8> %5, <16 x i8>* %d
326 ; CHECK-DAG: st.b [[R1]], 0($4)
329 ; CHECK: .size maddv_v16i8
332 define void @maddv_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
333 <8 x i16>* %c) nounwind {
334 ; CHECK: maddv_v8i16:
336 %1 = load <8 x i16>* %a
337 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
338 %2 = load <8 x i16>* %b
339 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
340 %3 = load <8 x i16>* %c
341 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
342 %4 = mul <8 x i16> %2, %3
343 %5 = add <8 x i16> %4, %1
344 ; CHECK-DAG: maddv.h [[R1]], [[R2]], [[R3]]
345 store <8 x i16> %5, <8 x i16>* %d
346 ; CHECK-DAG: st.h [[R1]], 0($4)
349 ; CHECK: .size maddv_v8i16
352 define void @maddv_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
353 <4 x i32>* %c) nounwind {
354 ; CHECK: maddv_v4i32:
356 %1 = load <4 x i32>* %a
357 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
358 %2 = load <4 x i32>* %b
359 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
360 %3 = load <4 x i32>* %c
361 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
362 %4 = mul <4 x i32> %2, %3
363 %5 = add <4 x i32> %4, %1
364 ; CHECK-DAG: maddv.w [[R1]], [[R2]], [[R3]]
365 store <4 x i32> %5, <4 x i32>* %d
366 ; CHECK-DAG: st.w [[R1]], 0($4)
369 ; CHECK: .size maddv_v4i32
372 define void @maddv_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
373 <2 x i64>* %c) nounwind {
374 ; CHECK: maddv_v2i64:
376 %1 = load <2 x i64>* %a
377 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
378 %2 = load <2 x i64>* %b
379 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
380 %3 = load <2 x i64>* %c
381 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
382 %4 = mul <2 x i64> %2, %3
383 %5 = add <2 x i64> %4, %1
384 ; CHECK-DAG: maddv.d [[R1]], [[R2]], [[R3]]
385 store <2 x i64> %5, <2 x i64>* %d
386 ; CHECK-DAG: st.d [[R1]], 0($4)
389 ; CHECK: .size maddv_v2i64
392 define void @msubv_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
393 <16 x i8>* %c) nounwind {
394 ; CHECK: msubv_v16i8:
396 %1 = load <16 x i8>* %a
397 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
398 %2 = load <16 x i8>* %b
399 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
400 %3 = load <16 x i8>* %c
401 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
402 %4 = mul <16 x i8> %2, %3
403 %5 = sub <16 x i8> %1, %4
404 ; CHECK-DAG: msubv.b [[R1]], [[R2]], [[R3]]
405 store <16 x i8> %5, <16 x i8>* %d
406 ; CHECK-DAG: st.b [[R1]], 0($4)
409 ; CHECK: .size msubv_v16i8
412 define void @msubv_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
413 <8 x i16>* %c) nounwind {
414 ; CHECK: msubv_v8i16:
416 %1 = load <8 x i16>* %a
417 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
418 %2 = load <8 x i16>* %b
419 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
420 %3 = load <8 x i16>* %c
421 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
422 %4 = mul <8 x i16> %2, %3
423 %5 = sub <8 x i16> %1, %4
424 ; CHECK-DAG: msubv.h [[R1]], [[R2]], [[R3]]
425 store <8 x i16> %5, <8 x i16>* %d
426 ; CHECK-DAG: st.h [[R1]], 0($4)
429 ; CHECK: .size msubv_v8i16
432 define void @msubv_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
433 <4 x i32>* %c) nounwind {
434 ; CHECK: msubv_v4i32:
436 %1 = load <4 x i32>* %a
437 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
438 %2 = load <4 x i32>* %b
439 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
440 %3 = load <4 x i32>* %c
441 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
442 %4 = mul <4 x i32> %2, %3
443 %5 = sub <4 x i32> %1, %4
444 ; CHECK-DAG: msubv.w [[R1]], [[R2]], [[R3]]
445 store <4 x i32> %5, <4 x i32>* %d
446 ; CHECK-DAG: st.w [[R1]], 0($4)
449 ; CHECK: .size msubv_v4i32
452 define void @msubv_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
453 <2 x i64>* %c) nounwind {
454 ; CHECK: msubv_v2i64:
456 %1 = load <2 x i64>* %a
457 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
458 %2 = load <2 x i64>* %b
459 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
460 %3 = load <2 x i64>* %c
461 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
462 %4 = mul <2 x i64> %2, %3
463 %5 = sub <2 x i64> %1, %4
464 ; CHECK-DAG: msubv.d [[R1]], [[R2]], [[R3]]
465 store <2 x i64> %5, <2 x i64>* %d
466 ; CHECK-DAG: st.d [[R1]], 0($4)
469 ; CHECK: .size msubv_v2i64
472 define void @div_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
473 ; CHECK: div_s_v16i8:
475 %1 = load <16 x i8>* %a
476 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
477 %2 = load <16 x i8>* %b
478 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
479 %3 = sdiv <16 x i8> %1, %2
480 ; CHECK-DAG: div_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
481 store <16 x i8> %3, <16 x i8>* %c
482 ; CHECK-DAG: st.b [[R3]], 0($4)
485 ; CHECK: .size div_s_v16i8
488 define void @div_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
489 ; CHECK: div_s_v8i16:
491 %1 = load <8 x i16>* %a
492 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
493 %2 = load <8 x i16>* %b
494 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
495 %3 = sdiv <8 x i16> %1, %2
496 ; CHECK-DAG: div_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
497 store <8 x i16> %3, <8 x i16>* %c
498 ; CHECK-DAG: st.h [[R3]], 0($4)
501 ; CHECK: .size div_s_v8i16
504 define void @div_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
505 ; CHECK: div_s_v4i32:
507 %1 = load <4 x i32>* %a
508 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
509 %2 = load <4 x i32>* %b
510 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
511 %3 = sdiv <4 x i32> %1, %2
512 ; CHECK-DAG: div_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
513 store <4 x i32> %3, <4 x i32>* %c
514 ; CHECK-DAG: st.w [[R3]], 0($4)
517 ; CHECK: .size div_s_v4i32
520 define void @div_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
521 ; CHECK: div_s_v2i64:
523 %1 = load <2 x i64>* %a
524 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
525 %2 = load <2 x i64>* %b
526 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
527 %3 = sdiv <2 x i64> %1, %2
528 ; CHECK-DAG: div_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
529 store <2 x i64> %3, <2 x i64>* %c
530 ; CHECK-DAG: st.d [[R3]], 0($4)
533 ; CHECK: .size div_s_v2i64
536 define void @div_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
537 ; CHECK: div_u_v16i8:
539 %1 = load <16 x i8>* %a
540 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
541 %2 = load <16 x i8>* %b
542 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
543 %3 = udiv <16 x i8> %1, %2
544 ; CHECK-DAG: div_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
545 store <16 x i8> %3, <16 x i8>* %c
546 ; CHECK-DAG: st.b [[R3]], 0($4)
549 ; CHECK: .size div_u_v16i8
552 define void @div_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
553 ; CHECK: div_u_v8i16:
555 %1 = load <8 x i16>* %a
556 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
557 %2 = load <8 x i16>* %b
558 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
559 %3 = udiv <8 x i16> %1, %2
560 ; CHECK-DAG: div_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
561 store <8 x i16> %3, <8 x i16>* %c
562 ; CHECK-DAG: st.h [[R3]], 0($4)
565 ; CHECK: .size div_u_v8i16
568 define void @div_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
569 ; CHECK: div_u_v4i32:
571 %1 = load <4 x i32>* %a
572 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
573 %2 = load <4 x i32>* %b
574 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
575 %3 = udiv <4 x i32> %1, %2
576 ; CHECK-DAG: div_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
577 store <4 x i32> %3, <4 x i32>* %c
578 ; CHECK-DAG: st.w [[R3]], 0($4)
581 ; CHECK: .size div_u_v4i32
584 define void @div_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
585 ; CHECK: div_u_v2i64:
587 %1 = load <2 x i64>* %a
588 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
589 %2 = load <2 x i64>* %b
590 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
591 %3 = udiv <2 x i64> %1, %2
592 ; CHECK-DAG: div_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
593 store <2 x i64> %3, <2 x i64>* %c
594 ; CHECK-DAG: st.d [[R3]], 0($4)
597 ; CHECK: .size div_u_v2i64
600 define void @mod_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
601 ; CHECK: mod_s_v16i8:
603 %1 = load <16 x i8>* %a
604 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
605 %2 = load <16 x i8>* %b
606 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
607 %3 = srem <16 x i8> %1, %2
608 ; CHECK-DAG: mod_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
609 store <16 x i8> %3, <16 x i8>* %c
610 ; CHECK-DAG: st.b [[R3]], 0($4)
613 ; CHECK: .size mod_s_v16i8
616 define void @mod_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
617 ; CHECK: mod_s_v8i16:
619 %1 = load <8 x i16>* %a
620 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
621 %2 = load <8 x i16>* %b
622 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
623 %3 = srem <8 x i16> %1, %2
624 ; CHECK-DAG: mod_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
625 store <8 x i16> %3, <8 x i16>* %c
626 ; CHECK-DAG: st.h [[R3]], 0($4)
629 ; CHECK: .size mod_s_v8i16
632 define void @mod_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
633 ; CHECK: mod_s_v4i32:
635 %1 = load <4 x i32>* %a
636 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
637 %2 = load <4 x i32>* %b
638 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
639 %3 = srem <4 x i32> %1, %2
640 ; CHECK-DAG: mod_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
641 store <4 x i32> %3, <4 x i32>* %c
642 ; CHECK-DAG: st.w [[R3]], 0($4)
645 ; CHECK: .size mod_s_v4i32
648 define void @mod_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
649 ; CHECK: mod_s_v2i64:
651 %1 = load <2 x i64>* %a
652 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
653 %2 = load <2 x i64>* %b
654 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
655 %3 = srem <2 x i64> %1, %2
656 ; CHECK-DAG: mod_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
657 store <2 x i64> %3, <2 x i64>* %c
658 ; CHECK-DAG: st.d [[R3]], 0($4)
661 ; CHECK: .size mod_s_v2i64
664 define void @mod_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
665 ; CHECK: mod_u_v16i8:
667 %1 = load <16 x i8>* %a
668 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
669 %2 = load <16 x i8>* %b
670 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
671 %3 = urem <16 x i8> %1, %2
672 ; CHECK-DAG: mod_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
673 store <16 x i8> %3, <16 x i8>* %c
674 ; CHECK-DAG: st.b [[R3]], 0($4)
677 ; CHECK: .size mod_u_v16i8
680 define void @mod_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
681 ; CHECK: mod_u_v8i16:
683 %1 = load <8 x i16>* %a
684 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
685 %2 = load <8 x i16>* %b
686 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
687 %3 = urem <8 x i16> %1, %2
688 ; CHECK-DAG: mod_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
689 store <8 x i16> %3, <8 x i16>* %c
690 ; CHECK-DAG: st.h [[R3]], 0($4)
693 ; CHECK: .size mod_u_v8i16
696 define void @mod_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
697 ; CHECK: mod_u_v4i32:
699 %1 = load <4 x i32>* %a
700 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
701 %2 = load <4 x i32>* %b
702 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
703 %3 = urem <4 x i32> %1, %2
704 ; CHECK-DAG: mod_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
705 store <4 x i32> %3, <4 x i32>* %c
706 ; CHECK-DAG: st.w [[R3]], 0($4)
709 ; CHECK: .size mod_u_v4i32
712 define void @mod_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
713 ; CHECK: mod_u_v2i64:
715 %1 = load <2 x i64>* %a
716 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
717 %2 = load <2 x i64>* %b
718 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
719 %3 = urem <2 x i64> %1, %2
720 ; CHECK-DAG: mod_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
721 store <2 x i64> %3, <2 x i64>* %c
722 ; CHECK-DAG: st.d [[R3]], 0($4)
725 ; CHECK: .size mod_u_v2i64