1 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
3 define void @and_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
6 %1 = load <16 x i8>* %a
7 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
8 %2 = load <16 x i8>* %b
9 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
10 %3 = and <16 x i8> %1, %2
11 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
12 store <16 x i8> %3, <16 x i8>* %c
13 ; CHECK-DAG: st.b [[R3]], 0($4)
16 ; CHECK: .size and_v16i8
19 define void @and_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
22 %1 = load <8 x i16>* %a
23 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
24 %2 = load <8 x i16>* %b
25 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
26 %3 = and <8 x i16> %1, %2
27 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
28 store <8 x i16> %3, <8 x i16>* %c
29 ; CHECK-DAG: st.h [[R3]], 0($4)
32 ; CHECK: .size and_v8i16
35 define void @and_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
38 %1 = load <4 x i32>* %a
39 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
40 %2 = load <4 x i32>* %b
41 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
42 %3 = and <4 x i32> %1, %2
43 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
44 store <4 x i32> %3, <4 x i32>* %c
45 ; CHECK-DAG: st.w [[R3]], 0($4)
48 ; CHECK: .size and_v4i32
51 define void @and_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
54 %1 = load <2 x i64>* %a
55 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
56 %2 = load <2 x i64>* %b
57 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
58 %3 = and <2 x i64> %1, %2
59 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
60 store <2 x i64> %3, <2 x i64>* %c
61 ; CHECK-DAG: st.d [[R3]], 0($4)
64 ; CHECK: .size and_v2i64
67 define void @and_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
70 %1 = load <16 x i8>* %a
71 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
72 %2 = and <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
73 ; CHECK-DAG: andi.b [[R4:\$w[0-9]+]], [[R1]], 1
74 store <16 x i8> %2, <16 x i8>* %c
75 ; CHECK-DAG: st.b [[R4]], 0($4)
78 ; CHECK: .size and_v16i8_i
81 define void @and_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
84 %1 = load <8 x i16>* %a
85 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
86 %2 = and <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
87 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
88 ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
89 store <8 x i16> %2, <8 x i16>* %c
90 ; CHECK-DAG: st.h [[R4]], 0($4)
93 ; CHECK: .size and_v8i16_i
96 define void @and_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
99 %1 = load <4 x i32>* %a
100 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
101 %2 = and <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
102 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
103 ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
104 store <4 x i32> %2, <4 x i32>* %c
105 ; CHECK-DAG: st.w [[R4]], 0($4)
108 ; CHECK: .size and_v4i32_i
111 define void @and_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
112 ; CHECK: and_v2i64_i:
114 %1 = load <2 x i64>* %a
115 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
116 %2 = and <2 x i64> %1, <i64 1, i64 1>
117 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
118 ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
119 store <2 x i64> %2, <2 x i64>* %c
120 ; CHECK-DAG: st.d [[R4]], 0($4)
123 ; CHECK: .size and_v2i64_i
126 define void @or_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
129 %1 = load <16 x i8>* %a
130 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
131 %2 = load <16 x i8>* %b
132 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
133 %3 = or <16 x i8> %1, %2
134 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
135 store <16 x i8> %3, <16 x i8>* %c
136 ; CHECK-DAG: st.b [[R3]], 0($4)
139 ; CHECK: .size or_v16i8
142 define void @or_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
145 %1 = load <8 x i16>* %a
146 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
147 %2 = load <8 x i16>* %b
148 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
149 %3 = or <8 x i16> %1, %2
150 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
151 store <8 x i16> %3, <8 x i16>* %c
152 ; CHECK-DAG: st.h [[R3]], 0($4)
155 ; CHECK: .size or_v8i16
158 define void @or_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
161 %1 = load <4 x i32>* %a
162 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
163 %2 = load <4 x i32>* %b
164 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
165 %3 = or <4 x i32> %1, %2
166 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
167 store <4 x i32> %3, <4 x i32>* %c
168 ; CHECK-DAG: st.w [[R3]], 0($4)
171 ; CHECK: .size or_v4i32
174 define void @or_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
177 %1 = load <2 x i64>* %a
178 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
179 %2 = load <2 x i64>* %b
180 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
181 %3 = or <2 x i64> %1, %2
182 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
183 store <2 x i64> %3, <2 x i64>* %c
184 ; CHECK-DAG: st.d [[R3]], 0($4)
187 ; CHECK: .size or_v2i64
190 define void @or_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
193 %1 = load <16 x i8>* %a
194 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
195 %2 = or <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
196 ; CHECK-DAG: ori.b [[R4:\$w[0-9]+]], [[R1]], 1
197 store <16 x i8> %2, <16 x i8>* %c
198 ; CHECK-DAG: st.b [[R4]], 0($4)
201 ; CHECK: .size or_v16i8_i
204 define void @or_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
207 %1 = load <8 x i16>* %a
208 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
209 %2 = or <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
210 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
211 ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
212 store <8 x i16> %2, <8 x i16>* %c
213 ; CHECK-DAG: st.h [[R4]], 0($4)
216 ; CHECK: .size or_v8i16_i
219 define void @or_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
222 %1 = load <4 x i32>* %a
223 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
224 %2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
225 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
226 ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
227 store <4 x i32> %2, <4 x i32>* %c
228 ; CHECK-DAG: st.w [[R4]], 0($4)
231 ; CHECK: .size or_v4i32_i
234 define void @or_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
237 %1 = load <2 x i64>* %a
238 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
239 %2 = or <2 x i64> %1, <i64 1, i64 1>
240 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
241 ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
242 store <2 x i64> %2, <2 x i64>* %c
243 ; CHECK-DAG: st.d [[R4]], 0($4)
246 ; CHECK: .size or_v2i64_i
249 define void @nor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
252 %1 = load <16 x i8>* %a
253 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
254 %2 = load <16 x i8>* %b
255 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
256 %3 = or <16 x i8> %1, %2
257 %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
258 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
259 store <16 x i8> %4, <16 x i8>* %c
260 ; CHECK-DAG: st.b [[R3]], 0($4)
263 ; CHECK: .size nor_v16i8
266 define void @nor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
269 %1 = load <8 x i16>* %a
270 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
271 %2 = load <8 x i16>* %b
272 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
273 %3 = or <8 x i16> %1, %2
274 %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
275 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
276 store <8 x i16> %4, <8 x i16>* %c
277 ; CHECK-DAG: st.h [[R3]], 0($4)
280 ; CHECK: .size nor_v8i16
283 define void @nor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
286 %1 = load <4 x i32>* %a
287 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
288 %2 = load <4 x i32>* %b
289 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
290 %3 = or <4 x i32> %1, %2
291 %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
292 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
293 store <4 x i32> %4, <4 x i32>* %c
294 ; CHECK-DAG: st.w [[R3]], 0($4)
297 ; CHECK: .size nor_v4i32
300 define void @nor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
303 %1 = load <2 x i64>* %a
304 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
305 %2 = load <2 x i64>* %b
306 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
307 %3 = or <2 x i64> %1, %2
308 %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
309 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
310 store <2 x i64> %4, <2 x i64>* %c
311 ; CHECK-DAG: st.d [[R3]], 0($4)
314 ; CHECK: .size nor_v2i64
317 define void @nor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
318 ; CHECK: nor_v16i8_i:
320 %1 = load <16 x i8>* %a
321 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
322 %2 = or <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
323 %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
324 ; CHECK-DAG: ori.b [[R4:\$w[0-9]+]], [[R1]], 1
325 store <16 x i8> %3, <16 x i8>* %c
326 ; CHECK-DAG: st.b [[R4]], 0($4)
329 ; CHECK: .size nor_v16i8_i
332 define void @nor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
333 ; CHECK: nor_v8i16_i:
335 %1 = load <8 x i16>* %a
336 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
337 %2 = or <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
338 %3 = xor <8 x i16> %2, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
339 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
340 ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
341 store <8 x i16> %3, <8 x i16>* %c
342 ; CHECK-DAG: st.h [[R4]], 0($4)
345 ; CHECK: .size nor_v8i16_i
348 define void @nor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
349 ; CHECK: nor_v4i32_i:
351 %1 = load <4 x i32>* %a
352 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
353 %2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
354 %3 = xor <4 x i32> %2, <i32 -1, i32 -1, i32 -1, i32 -1>
355 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
356 ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
357 store <4 x i32> %3, <4 x i32>* %c
358 ; CHECK-DAG: st.w [[R4]], 0($4)
361 ; CHECK: .size nor_v4i32_i
364 define void @nor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
365 ; CHECK: nor_v2i64_i:
367 %1 = load <2 x i64>* %a
368 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
369 %2 = or <2 x i64> %1, <i64 1, i64 1>
370 %3 = xor <2 x i64> %2, <i64 -1, i64 -1>
371 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
372 ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
373 store <2 x i64> %3, <2 x i64>* %c
374 ; CHECK-DAG: st.d [[R4]], 0($4)
377 ; CHECK: .size nor_v2i64_i
380 define void @xor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
383 %1 = load <16 x i8>* %a
384 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
385 %2 = load <16 x i8>* %b
386 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
387 %3 = xor <16 x i8> %1, %2
388 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
389 store <16 x i8> %3, <16 x i8>* %c
390 ; CHECK-DAG: st.b [[R3]], 0($4)
393 ; CHECK: .size xor_v16i8
396 define void @xor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
399 %1 = load <8 x i16>* %a
400 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
401 %2 = load <8 x i16>* %b
402 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
403 %3 = xor <8 x i16> %1, %2
404 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
405 store <8 x i16> %3, <8 x i16>* %c
406 ; CHECK-DAG: st.h [[R3]], 0($4)
409 ; CHECK: .size xor_v8i16
412 define void @xor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
415 %1 = load <4 x i32>* %a
416 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
417 %2 = load <4 x i32>* %b
418 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
419 %3 = xor <4 x i32> %1, %2
420 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
421 store <4 x i32> %3, <4 x i32>* %c
422 ; CHECK-DAG: st.w [[R3]], 0($4)
425 ; CHECK: .size xor_v4i32
428 define void @xor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
431 %1 = load <2 x i64>* %a
432 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
433 %2 = load <2 x i64>* %b
434 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
435 %3 = xor <2 x i64> %1, %2
436 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
437 store <2 x i64> %3, <2 x i64>* %c
438 ; CHECK-DAG: st.d [[R3]], 0($4)
441 ; CHECK: .size xor_v2i64
444 define void @xor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
445 ; CHECK: xor_v16i8_i:
447 %1 = load <16 x i8>* %a
448 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
449 %2 = xor <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
450 ; CHECK-DAG: xori.b [[R4:\$w[0-9]+]], [[R1]], 1
451 store <16 x i8> %2, <16 x i8>* %c
452 ; CHECK-DAG: st.b [[R4]], 0($4)
455 ; CHECK: .size xor_v16i8_i
458 define void @xor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
459 ; CHECK: xor_v8i16_i:
461 %1 = load <8 x i16>* %a
462 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
463 %2 = xor <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
464 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
465 ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
466 store <8 x i16> %2, <8 x i16>* %c
467 ; CHECK-DAG: st.h [[R4]], 0($4)
470 ; CHECK: .size xor_v8i16_i
473 define void @xor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
474 ; CHECK: xor_v4i32_i:
476 %1 = load <4 x i32>* %a
477 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
478 %2 = xor <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
479 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
480 ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
481 store <4 x i32> %2, <4 x i32>* %c
482 ; CHECK-DAG: st.w [[R4]], 0($4)
485 ; CHECK: .size xor_v4i32_i
488 define void @xor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
489 ; CHECK: xor_v2i64_i:
491 %1 = load <2 x i64>* %a
492 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
493 %2 = xor <2 x i64> %1, <i64 1, i64 1>
494 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
495 ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
496 store <2 x i64> %2, <2 x i64>* %c
497 ; CHECK-DAG: st.d [[R4]], 0($4)
500 ; CHECK: .size xor_v2i64_i
503 define void @sll_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
506 %1 = load <16 x i8>* %a
507 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
508 %2 = load <16 x i8>* %b
509 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
510 %3 = shl <16 x i8> %1, %2
511 ; CHECK-DAG: sll.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
512 store <16 x i8> %3, <16 x i8>* %c
513 ; CHECK-DAG: st.b [[R3]], 0($4)
516 ; CHECK: .size sll_v16i8
519 define void @sll_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
522 %1 = load <8 x i16>* %a
523 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
524 %2 = load <8 x i16>* %b
525 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
526 %3 = shl <8 x i16> %1, %2
527 ; CHECK-DAG: sll.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
528 store <8 x i16> %3, <8 x i16>* %c
529 ; CHECK-DAG: st.h [[R3]], 0($4)
532 ; CHECK: .size sll_v8i16
535 define void @sll_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
538 %1 = load <4 x i32>* %a
539 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
540 %2 = load <4 x i32>* %b
541 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
542 %3 = shl <4 x i32> %1, %2
543 ; CHECK-DAG: sll.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
544 store <4 x i32> %3, <4 x i32>* %c
545 ; CHECK-DAG: st.w [[R3]], 0($4)
548 ; CHECK: .size sll_v4i32
551 define void @sll_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
554 %1 = load <2 x i64>* %a
555 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
556 %2 = load <2 x i64>* %b
557 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
558 %3 = shl <2 x i64> %1, %2
559 ; CHECK-DAG: sll.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
560 store <2 x i64> %3, <2 x i64>* %c
561 ; CHECK-DAG: st.d [[R3]], 0($4)
564 ; CHECK: .size sll_v2i64
567 define void @sll_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
568 ; CHECK: sll_v16i8_i:
570 %1 = load <16 x i8>* %a
571 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
572 %2 = shl <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
573 ; CHECK-DAG: slli.b [[R4:\$w[0-9]+]], [[R1]], 1
574 store <16 x i8> %2, <16 x i8>* %c
575 ; CHECK-DAG: st.b [[R4]], 0($4)
578 ; CHECK: .size sll_v16i8_i
581 define void @sll_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
582 ; CHECK: sll_v8i16_i:
584 %1 = load <8 x i16>* %a
585 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
586 %2 = shl <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
587 ; CHECK-DAG: slli.h [[R4:\$w[0-9]+]], [[R1]], 1
588 store <8 x i16> %2, <8 x i16>* %c
589 ; CHECK-DAG: st.h [[R4]], 0($4)
592 ; CHECK: .size sll_v8i16_i
595 define void @sll_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
596 ; CHECK: sll_v4i32_i:
598 %1 = load <4 x i32>* %a
599 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
600 %2 = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
601 ; CHECK-DAG: slli.w [[R4:\$w[0-9]+]], [[R1]], 1
602 store <4 x i32> %2, <4 x i32>* %c
603 ; CHECK-DAG: st.w [[R4]], 0($4)
606 ; CHECK: .size sll_v4i32_i
609 define void @sll_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
610 ; CHECK: sll_v2i64_i:
612 %1 = load <2 x i64>* %a
613 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
614 %2 = shl <2 x i64> %1, <i64 1, i64 1>
615 ; CHECK-DAG: slli.d [[R4:\$w[0-9]+]], [[R1]], 1
616 store <2 x i64> %2, <2 x i64>* %c
617 ; CHECK-DAG: st.d [[R4]], 0($4)
620 ; CHECK: .size sll_v2i64_i
623 define void @sra_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
626 %1 = load <16 x i8>* %a
627 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
628 %2 = load <16 x i8>* %b
629 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
630 %3 = ashr <16 x i8> %1, %2
631 ; CHECK-DAG: sra.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
632 store <16 x i8> %3, <16 x i8>* %c
633 ; CHECK-DAG: st.b [[R3]], 0($4)
636 ; CHECK: .size sra_v16i8
639 define void @sra_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
642 %1 = load <8 x i16>* %a
643 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
644 %2 = load <8 x i16>* %b
645 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
646 %3 = ashr <8 x i16> %1, %2
647 ; CHECK-DAG: sra.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
648 store <8 x i16> %3, <8 x i16>* %c
649 ; CHECK-DAG: st.h [[R3]], 0($4)
652 ; CHECK: .size sra_v8i16
655 define void @sra_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
658 %1 = load <4 x i32>* %a
659 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
660 %2 = load <4 x i32>* %b
661 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
662 %3 = ashr <4 x i32> %1, %2
663 ; CHECK-DAG: sra.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
664 store <4 x i32> %3, <4 x i32>* %c
665 ; CHECK-DAG: st.w [[R3]], 0($4)
668 ; CHECK: .size sra_v4i32
671 define void @sra_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
674 %1 = load <2 x i64>* %a
675 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
676 %2 = load <2 x i64>* %b
677 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
678 %3 = ashr <2 x i64> %1, %2
679 ; CHECK-DAG: sra.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
680 store <2 x i64> %3, <2 x i64>* %c
681 ; CHECK-DAG: st.d [[R3]], 0($4)
684 ; CHECK: .size sra_v2i64
687 define void @sra_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
688 ; CHECK: sra_v16i8_i:
690 %1 = load <16 x i8>* %a
691 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
692 %2 = ashr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
693 ; CHECK-DAG: srai.b [[R4:\$w[0-9]+]], [[R1]], 1
694 store <16 x i8> %2, <16 x i8>* %c
695 ; CHECK-DAG: st.b [[R4]], 0($4)
698 ; CHECK: .size sra_v16i8_i
701 define void @sra_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
702 ; CHECK: sra_v8i16_i:
704 %1 = load <8 x i16>* %a
705 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
706 %2 = ashr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
707 ; CHECK-DAG: srai.h [[R4:\$w[0-9]+]], [[R1]], 1
708 store <8 x i16> %2, <8 x i16>* %c
709 ; CHECK-DAG: st.h [[R4]], 0($4)
712 ; CHECK: .size sra_v8i16_i
715 define void @sra_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
716 ; CHECK: sra_v4i32_i:
718 %1 = load <4 x i32>* %a
719 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
720 %2 = ashr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
721 ; CHECK-DAG: srai.w [[R4:\$w[0-9]+]], [[R1]], 1
722 store <4 x i32> %2, <4 x i32>* %c
723 ; CHECK-DAG: st.w [[R4]], 0($4)
726 ; CHECK: .size sra_v4i32_i
729 define void @sra_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
730 ; CHECK: sra_v2i64_i:
732 %1 = load <2 x i64>* %a
733 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
734 %2 = ashr <2 x i64> %1, <i64 1, i64 1>
735 ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R1]], 1
736 store <2 x i64> %2, <2 x i64>* %c
737 ; CHECK-DAG: st.d [[R4]], 0($4)
740 ; CHECK: .size sra_v2i64_i
743 define void @srl_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
746 %1 = load <16 x i8>* %a
747 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
748 %2 = load <16 x i8>* %b
749 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
750 %3 = lshr <16 x i8> %1, %2
751 ; CHECK-DAG: srl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
752 store <16 x i8> %3, <16 x i8>* %c
753 ; CHECK-DAG: st.b [[R3]], 0($4)
756 ; CHECK: .size srl_v16i8
759 define void @srl_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
762 %1 = load <8 x i16>* %a
763 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
764 %2 = load <8 x i16>* %b
765 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
766 %3 = lshr <8 x i16> %1, %2
767 ; CHECK-DAG: srl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
768 store <8 x i16> %3, <8 x i16>* %c
769 ; CHECK-DAG: st.h [[R3]], 0($4)
772 ; CHECK: .size srl_v8i16
775 define void @srl_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
778 %1 = load <4 x i32>* %a
779 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
780 %2 = load <4 x i32>* %b
781 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
782 %3 = lshr <4 x i32> %1, %2
783 ; CHECK-DAG: srl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
784 store <4 x i32> %3, <4 x i32>* %c
785 ; CHECK-DAG: st.w [[R3]], 0($4)
788 ; CHECK: .size srl_v4i32
791 define void @srl_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
794 %1 = load <2 x i64>* %a
795 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
796 %2 = load <2 x i64>* %b
797 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
798 %3 = lshr <2 x i64> %1, %2
799 ; CHECK-DAG: srl.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
800 store <2 x i64> %3, <2 x i64>* %c
801 ; CHECK-DAG: st.d [[R3]], 0($4)
804 ; CHECK: .size srl_v2i64
807 define void @srl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
808 ; CHECK: srl_v16i8_i:
810 %1 = load <16 x i8>* %a
811 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
812 %2 = lshr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
813 ; CHECK-DAG: srli.b [[R4:\$w[0-9]+]], [[R1]], 1
814 store <16 x i8> %2, <16 x i8>* %c
815 ; CHECK-DAG: st.b [[R4]], 0($4)
818 ; CHECK: .size srl_v16i8_i
821 define void @srl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
822 ; CHECK: srl_v8i16_i:
824 %1 = load <8 x i16>* %a
825 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
826 %2 = lshr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
827 ; CHECK-DAG: srli.h [[R4:\$w[0-9]+]], [[R1]], 1
828 store <8 x i16> %2, <8 x i16>* %c
829 ; CHECK-DAG: st.h [[R4]], 0($4)
832 ; CHECK: .size srl_v8i16_i
835 define void @srl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
836 ; CHECK: srl_v4i32_i:
838 %1 = load <4 x i32>* %a
839 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
840 %2 = lshr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
841 ; CHECK-DAG: srli.w [[R4:\$w[0-9]+]], [[R1]], 1
842 store <4 x i32> %2, <4 x i32>* %c
843 ; CHECK-DAG: st.w [[R4]], 0($4)
846 ; CHECK: .size srl_v4i32_i
849 define void @srl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
850 ; CHECK: srl_v2i64_i:
852 %1 = load <2 x i64>* %a
853 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
854 %2 = lshr <2 x i64> %1, <i64 1, i64 1>
855 ; CHECK-DAG: srli.d [[R4:\$w[0-9]+]], [[R1]], 1
856 store <2 x i64> %2, <2 x i64>* %c
857 ; CHECK-DAG: st.d [[R4]], 0($4)
860 ; CHECK: .size srl_v2i64_i
863 define void @ctpop_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
864 ; CHECK: ctpop_v16i8:
866 %1 = load <16 x i8>* %a
867 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
868 %2 = tail call <16 x i8> @llvm.ctpop.v16i8 (<16 x i8> %1)
869 ; CHECK-DAG: pcnt.b [[R3:\$w[0-9]+]], [[R1]]
870 store <16 x i8> %2, <16 x i8>* %c
871 ; CHECK-DAG: st.b [[R3]], 0($4)
874 ; CHECK: .size ctpop_v16i8
877 define void @ctpop_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
878 ; CHECK: ctpop_v8i16:
880 %1 = load <8 x i16>* %a
881 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
882 %2 = tail call <8 x i16> @llvm.ctpop.v8i16 (<8 x i16> %1)
883 ; CHECK-DAG: pcnt.h [[R3:\$w[0-9]+]], [[R1]]
884 store <8 x i16> %2, <8 x i16>* %c
885 ; CHECK-DAG: st.h [[R3]], 0($4)
888 ; CHECK: .size ctpop_v8i16
891 define void @ctpop_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
892 ; CHECK: ctpop_v4i32:
894 %1 = load <4 x i32>* %a
895 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
896 %2 = tail call <4 x i32> @llvm.ctpop.v4i32 (<4 x i32> %1)
897 ; CHECK-DAG: pcnt.w [[R3:\$w[0-9]+]], [[R1]]
898 store <4 x i32> %2, <4 x i32>* %c
899 ; CHECK-DAG: st.w [[R3]], 0($4)
902 ; CHECK: .size ctpop_v4i32
905 define void @ctpop_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
906 ; CHECK: ctpop_v2i64:
908 %1 = load <2 x i64>* %a
909 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
910 %2 = tail call <2 x i64> @llvm.ctpop.v2i64 (<2 x i64> %1)
911 ; CHECK-DAG: pcnt.d [[R3:\$w[0-9]+]], [[R1]]
912 store <2 x i64> %2, <2 x i64>* %c
913 ; CHECK-DAG: st.d [[R3]], 0($4)
916 ; CHECK: .size ctpop_v2i64
919 define void @ctlz_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
922 %1 = load <16 x i8>* %a
923 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
924 %2 = tail call <16 x i8> @llvm.ctlz.v16i8 (<16 x i8> %1)
925 ; CHECK-DAG: nlzc.b [[R3:\$w[0-9]+]], [[R1]]
926 store <16 x i8> %2, <16 x i8>* %c
927 ; CHECK-DAG: st.b [[R3]], 0($4)
930 ; CHECK: .size ctlz_v16i8
933 define void @ctlz_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
936 %1 = load <8 x i16>* %a
937 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
938 %2 = tail call <8 x i16> @llvm.ctlz.v8i16 (<8 x i16> %1)
939 ; CHECK-DAG: nlzc.h [[R3:\$w[0-9]+]], [[R1]]
940 store <8 x i16> %2, <8 x i16>* %c
941 ; CHECK-DAG: st.h [[R3]], 0($4)
944 ; CHECK: .size ctlz_v8i16
947 define void @ctlz_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
950 %1 = load <4 x i32>* %a
951 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
952 %2 = tail call <4 x i32> @llvm.ctlz.v4i32 (<4 x i32> %1)
953 ; CHECK-DAG: nlzc.w [[R3:\$w[0-9]+]], [[R1]]
954 store <4 x i32> %2, <4 x i32>* %c
955 ; CHECK-DAG: st.w [[R3]], 0($4)
958 ; CHECK: .size ctlz_v4i32
961 define void @ctlz_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
964 %1 = load <2 x i64>* %a
965 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
966 %2 = tail call <2 x i64> @llvm.ctlz.v2i64 (<2 x i64> %1)
967 ; CHECK-DAG: nlzc.d [[R3:\$w[0-9]+]], [[R1]]
968 store <2 x i64> %2, <2 x i64>* %c
969 ; CHECK-DAG: st.d [[R3]], 0($4)
972 ; CHECK: .size ctlz_v2i64
975 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %val)
976 declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val)
977 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val)
978 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val)
979 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %val)
980 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %val)
981 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val)
982 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %val)