1 ; RUN: llc -march=mips -mattr=+msa < %s | FileCheck %s
3 define void @add_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
6 %1 = load <16 x i8>* %a
7 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
8 %2 = load <16 x i8>* %b
9 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
10 %3 = add <16 x i8> %1, %2
11 ; CHECK-DAG: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
12 store <16 x i8> %3, <16 x i8>* %c
13 ; CHECK-DAG: st.b [[R3]], 0($4)
16 ; CHECK: .size add_v16i8
19 define void @add_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
22 %1 = load <8 x i16>* %a
23 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
24 %2 = load <8 x i16>* %b
25 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
26 %3 = add <8 x i16> %1, %2
27 ; CHECK-DAG: addv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
28 store <8 x i16> %3, <8 x i16>* %c
29 ; CHECK-DAG: st.h [[R3]], 0($4)
32 ; CHECK: .size add_v8i16
35 define void @add_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
38 %1 = load <4 x i32>* %a
39 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
40 %2 = load <4 x i32>* %b
41 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
42 %3 = add <4 x i32> %1, %2
43 ; CHECK-DAG: addv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
44 store <4 x i32> %3, <4 x i32>* %c
45 ; CHECK-DAG: st.w [[R3]], 0($4)
48 ; CHECK: .size add_v4i32
51 define void @add_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
54 %1 = load <2 x i64>* %a
55 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
56 %2 = load <2 x i64>* %b
57 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
58 %3 = add <2 x i64> %1, %2
59 ; CHECK-DAG: addv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
60 store <2 x i64> %3, <2 x i64>* %c
61 ; CHECK-DAG: st.d [[R3]], 0($4)
64 ; CHECK: .size add_v2i64
67 define void @add_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
70 %1 = load <16 x i8>* %a
71 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
72 %2 = add <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
73 ; CHECK-DAG: addvi.b [[R3:\$w[0-9]+]], [[R1]], 1
74 store <16 x i8> %2, <16 x i8>* %c
75 ; CHECK-DAG: st.b [[R3]], 0($4)
78 ; CHECK: .size add_v16i8_i
81 define void @add_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
84 %1 = load <8 x i16>* %a
85 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
86 %2 = add <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
87 ; CHECK-DAG: addvi.h [[R3:\$w[0-9]+]], [[R1]], 1
88 store <8 x i16> %2, <8 x i16>* %c
89 ; CHECK-DAG: st.h [[R3]], 0($4)
92 ; CHECK: .size add_v8i16_i
95 define void @add_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
98 %1 = load <4 x i32>* %a
99 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
100 %2 = add <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
101 ; CHECK-DAG: addvi.w [[R3:\$w[0-9]+]], [[R1]], 1
102 store <4 x i32> %2, <4 x i32>* %c
103 ; CHECK-DAG: st.w [[R3]], 0($4)
106 ; CHECK: .size add_v4i32_i
109 define void @add_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
110 ; CHECK: add_v2i64_i:
112 %1 = load <2 x i64>* %a
113 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
114 %2 = add <2 x i64> %1, <i64 1, i64 1>
115 ; CHECK-DAG: addvi.d [[R3:\$w[0-9]+]], [[R1]], 1
116 store <2 x i64> %2, <2 x i64>* %c
117 ; CHECK-DAG: st.d [[R3]], 0($4)
120 ; CHECK: .size add_v2i64_i
123 define void @sub_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
126 %1 = load <16 x i8>* %a
127 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
128 %2 = load <16 x i8>* %b
129 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
130 %3 = sub <16 x i8> %1, %2
131 ; CHECK-DAG: subv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
132 store <16 x i8> %3, <16 x i8>* %c
133 ; CHECK-DAG: st.b [[R3]], 0($4)
136 ; CHECK: .size sub_v16i8
139 define void @sub_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
142 %1 = load <8 x i16>* %a
143 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
144 %2 = load <8 x i16>* %b
145 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
146 %3 = sub <8 x i16> %1, %2
147 ; CHECK-DAG: subv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
148 store <8 x i16> %3, <8 x i16>* %c
149 ; CHECK-DAG: st.h [[R3]], 0($4)
152 ; CHECK: .size sub_v8i16
155 define void @sub_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
158 %1 = load <4 x i32>* %a
159 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
160 %2 = load <4 x i32>* %b
161 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
162 %3 = sub <4 x i32> %1, %2
163 ; CHECK-DAG: subv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
164 store <4 x i32> %3, <4 x i32>* %c
165 ; CHECK-DAG: st.w [[R3]], 0($4)
168 ; CHECK: .size sub_v4i32
171 define void @sub_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
174 %1 = load <2 x i64>* %a
175 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
176 %2 = load <2 x i64>* %b
177 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
178 %3 = sub <2 x i64> %1, %2
179 ; CHECK-DAG: subv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
180 store <2 x i64> %3, <2 x i64>* %c
181 ; CHECK-DAG: st.d [[R3]], 0($4)
184 ; CHECK: .size sub_v2i64
187 define void @sub_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
188 ; CHECK: sub_v16i8_i:
190 %1 = load <16 x i8>* %a
191 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
192 %2 = sub <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
193 ; CHECK-DAG: subvi.b [[R3:\$w[0-9]+]], [[R1]], 1
194 store <16 x i8> %2, <16 x i8>* %c
195 ; CHECK-DAG: st.b [[R3]], 0($4)
198 ; CHECK: .size sub_v16i8_i
201 define void @sub_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
202 ; CHECK: sub_v8i16_i:
204 %1 = load <8 x i16>* %a
205 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
206 %2 = sub <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
207 ; CHECK-DAG: subvi.h [[R3:\$w[0-9]+]], [[R1]], 1
208 store <8 x i16> %2, <8 x i16>* %c
209 ; CHECK-DAG: st.h [[R3]], 0($4)
212 ; CHECK: .size sub_v8i16_i
215 define void @sub_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
216 ; CHECK: sub_v4i32_i:
218 %1 = load <4 x i32>* %a
219 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
220 %2 = sub <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
221 ; CHECK-DAG: subvi.w [[R3:\$w[0-9]+]], [[R1]], 1
222 store <4 x i32> %2, <4 x i32>* %c
223 ; CHECK-DAG: st.w [[R3]], 0($4)
226 ; CHECK: .size sub_v4i32_i
229 define void @sub_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
230 ; CHECK: sub_v2i64_i:
232 %1 = load <2 x i64>* %a
233 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
234 %2 = sub <2 x i64> %1, <i64 1, i64 1>
235 ; CHECK-DAG: subvi.d [[R3:\$w[0-9]+]], [[R1]], 1
236 store <2 x i64> %2, <2 x i64>* %c
237 ; CHECK-DAG: st.d [[R3]], 0($4)
240 ; CHECK: .size sub_v2i64_i
243 define void @mul_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
246 %1 = load <16 x i8>* %a
247 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
248 %2 = load <16 x i8>* %b
249 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
250 %3 = mul <16 x i8> %1, %2
251 ; CHECK-DAG: mulv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
252 store <16 x i8> %3, <16 x i8>* %c
253 ; CHECK-DAG: st.b [[R3]], 0($4)
256 ; CHECK: .size mul_v16i8
259 define void @mul_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
262 %1 = load <8 x i16>* %a
263 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
264 %2 = load <8 x i16>* %b
265 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
266 %3 = mul <8 x i16> %1, %2
267 ; CHECK-DAG: mulv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
268 store <8 x i16> %3, <8 x i16>* %c
269 ; CHECK-DAG: st.h [[R3]], 0($4)
272 ; CHECK: .size mul_v8i16
275 define void @mul_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
278 %1 = load <4 x i32>* %a
279 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
280 %2 = load <4 x i32>* %b
281 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
282 %3 = mul <4 x i32> %1, %2
283 ; CHECK-DAG: mulv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
284 store <4 x i32> %3, <4 x i32>* %c
285 ; CHECK-DAG: st.w [[R3]], 0($4)
288 ; CHECK: .size mul_v4i32
291 define void @mul_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
294 %1 = load <2 x i64>* %a
295 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
296 %2 = load <2 x i64>* %b
297 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
298 %3 = mul <2 x i64> %1, %2
299 ; CHECK-DAG: mulv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
300 store <2 x i64> %3, <2 x i64>* %c
301 ; CHECK-DAG: st.d [[R3]], 0($4)
304 ; CHECK: .size mul_v2i64
307 define void @div_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
308 ; CHECK: div_s_v16i8:
310 %1 = load <16 x i8>* %a
311 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
312 %2 = load <16 x i8>* %b
313 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
314 %3 = sdiv <16 x i8> %1, %2
315 ; CHECK-DAG: div_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
316 store <16 x i8> %3, <16 x i8>* %c
317 ; CHECK-DAG: st.b [[R3]], 0($4)
320 ; CHECK: .size div_s_v16i8
323 define void @div_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
324 ; CHECK: div_s_v8i16:
326 %1 = load <8 x i16>* %a
327 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
328 %2 = load <8 x i16>* %b
329 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
330 %3 = sdiv <8 x i16> %1, %2
331 ; CHECK-DAG: div_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
332 store <8 x i16> %3, <8 x i16>* %c
333 ; CHECK-DAG: st.h [[R3]], 0($4)
336 ; CHECK: .size div_s_v8i16
339 define void @div_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
340 ; CHECK: div_s_v4i32:
342 %1 = load <4 x i32>* %a
343 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
344 %2 = load <4 x i32>* %b
345 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
346 %3 = sdiv <4 x i32> %1, %2
347 ; CHECK-DAG: div_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
348 store <4 x i32> %3, <4 x i32>* %c
349 ; CHECK-DAG: st.w [[R3]], 0($4)
352 ; CHECK: .size div_s_v4i32
355 define void @div_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
356 ; CHECK: div_s_v2i64:
358 %1 = load <2 x i64>* %a
359 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
360 %2 = load <2 x i64>* %b
361 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
362 %3 = sdiv <2 x i64> %1, %2
363 ; CHECK-DAG: div_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
364 store <2 x i64> %3, <2 x i64>* %c
365 ; CHECK-DAG: st.d [[R3]], 0($4)
368 ; CHECK: .size div_s_v2i64
371 define void @div_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
372 ; CHECK: div_u_v16i8:
374 %1 = load <16 x i8>* %a
375 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
376 %2 = load <16 x i8>* %b
377 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
378 %3 = udiv <16 x i8> %1, %2
379 ; CHECK-DAG: div_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
380 store <16 x i8> %3, <16 x i8>* %c
381 ; CHECK-DAG: st.b [[R3]], 0($4)
384 ; CHECK: .size div_u_v16i8
387 define void @div_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
388 ; CHECK: div_u_v8i16:
390 %1 = load <8 x i16>* %a
391 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
392 %2 = load <8 x i16>* %b
393 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
394 %3 = udiv <8 x i16> %1, %2
395 ; CHECK-DAG: div_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
396 store <8 x i16> %3, <8 x i16>* %c
397 ; CHECK-DAG: st.h [[R3]], 0($4)
400 ; CHECK: .size div_u_v8i16
403 define void @div_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
404 ; CHECK: div_u_v4i32:
406 %1 = load <4 x i32>* %a
407 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
408 %2 = load <4 x i32>* %b
409 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
410 %3 = udiv <4 x i32> %1, %2
411 ; CHECK-DAG: div_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
412 store <4 x i32> %3, <4 x i32>* %c
413 ; CHECK-DAG: st.w [[R3]], 0($4)
416 ; CHECK: .size div_u_v4i32
419 define void @div_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
420 ; CHECK: div_u_v2i64:
422 %1 = load <2 x i64>* %a
423 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
424 %2 = load <2 x i64>* %b
425 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
426 %3 = udiv <2 x i64> %1, %2
427 ; CHECK-DAG: div_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
428 store <2 x i64> %3, <2 x i64>* %c
429 ; CHECK-DAG: st.d [[R3]], 0($4)
432 ; CHECK: .size div_u_v2i64