1 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
2 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
4 declare <4 x float> @llvm.mips.fmax.w(<4 x float>, <4 x float>) nounwind
5 declare <2 x double> @llvm.mips.fmax.d(<2 x double>, <2 x double>) nounwind
6 declare <4 x float> @llvm.mips.fmin.w(<4 x float>, <4 x float>) nounwind
7 declare <2 x double> @llvm.mips.fmin.d(<2 x double>, <2 x double>) nounwind
9 define void @false_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
12 %1 = load <4 x float>* %a
13 %2 = load <4 x float>* %b
14 %3 = fcmp false <4 x float> %1, %2
15 %4 = sext <4 x i1> %3 to <4 x i32>
16 store <4 x i32> %4, <4 x i32>* %c
19 ; (setcc $a, $b, SETFALSE) is always folded, so we won't get fcaf:
20 ; CHECK-DAG: ldi.b [[R1:\$w[0-9]+]], 0
21 ; CHECK-DAG: st.w [[R1]], 0($4)
22 ; CHECK: .size false_v4f32
25 define void @false_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
28 %1 = load <2 x double>* %a
29 %2 = load <2 x double>* %b
30 %3 = fcmp false <2 x double> %1, %2
31 %4 = sext <2 x i1> %3 to <2 x i64>
32 store <2 x i64> %4, <2 x i64>* %c
35 ; FIXME: This code is correct, but poor. Ideally it would be similar to
36 ; the code in @false_v4f32
37 ; CHECK-DAG: ldi.b [[R1:\$w[0-9]+]], 0
38 ; CHECK-DAG: slli.d [[R3:\$w[0-9]+]], [[R1]], 63
39 ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R3]], 63
40 ; CHECK-DAG: st.d [[R4]], 0($4)
41 ; CHECK: .size false_v2f64
44 define void @oeq_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
47 %1 = load <4 x float>* %a
48 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
49 %2 = load <4 x float>* %b
50 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
51 %3 = fcmp oeq <4 x float> %1, %2
52 %4 = sext <4 x i1> %3 to <4 x i32>
53 ; CHECK-DAG: fceq.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
54 store <4 x i32> %4, <4 x i32>* %c
55 ; CHECK-DAG: st.w [[R3]], 0($4)
58 ; CHECK: .size oeq_v4f32
61 define void @oeq_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
64 %1 = load <2 x double>* %a
65 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
66 %2 = load <2 x double>* %b
67 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
68 %3 = fcmp oeq <2 x double> %1, %2
69 %4 = sext <2 x i1> %3 to <2 x i64>
70 ; CHECK-DAG: fceq.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
71 store <2 x i64> %4, <2 x i64>* %c
72 ; CHECK-DAG: st.d [[R3]], 0($4)
75 ; CHECK: .size oeq_v2f64
78 define void @oge_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
81 %1 = load <4 x float>* %a
82 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
83 %2 = load <4 x float>* %b
84 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
85 %3 = fcmp oge <4 x float> %1, %2
86 %4 = sext <4 x i1> %3 to <4 x i32>
87 ; CHECK-DAG: fcle.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
88 store <4 x i32> %4, <4 x i32>* %c
89 ; CHECK-DAG: st.w [[R3]], 0($4)
92 ; CHECK: .size oge_v4f32
95 define void @oge_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
98 %1 = load <2 x double>* %a
99 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
100 %2 = load <2 x double>* %b
101 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
102 %3 = fcmp oge <2 x double> %1, %2
103 %4 = sext <2 x i1> %3 to <2 x i64>
104 ; CHECK-DAG: fcle.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
105 store <2 x i64> %4, <2 x i64>* %c
106 ; CHECK-DAG: st.d [[R3]], 0($4)
109 ; CHECK: .size oge_v2f64
112 define void @ogt_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
115 %1 = load <4 x float>* %a
116 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
117 %2 = load <4 x float>* %b
118 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
119 %3 = fcmp ogt <4 x float> %1, %2
120 %4 = sext <4 x i1> %3 to <4 x i32>
121 ; CHECK-DAG: fclt.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
122 store <4 x i32> %4, <4 x i32>* %c
123 ; CHECK-DAG: st.w [[R3]], 0($4)
126 ; CHECK: .size ogt_v4f32
129 define void @ogt_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
132 %1 = load <2 x double>* %a
133 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
134 %2 = load <2 x double>* %b
135 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
136 %3 = fcmp ogt <2 x double> %1, %2
137 %4 = sext <2 x i1> %3 to <2 x i64>
138 ; CHECK-DAG: fclt.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
139 store <2 x i64> %4, <2 x i64>* %c
140 ; CHECK-DAG: st.d [[R3]], 0($4)
143 ; CHECK: .size ogt_v2f64
146 define void @ole_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
149 %1 = load <4 x float>* %a
150 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
151 %2 = load <4 x float>* %b
152 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
153 %3 = fcmp ole <4 x float> %1, %2
154 %4 = sext <4 x i1> %3 to <4 x i32>
155 ; CHECK-DAG: fcle.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
156 store <4 x i32> %4, <4 x i32>* %c
157 ; CHECK-DAG: st.w [[R3]], 0($4)
160 ; CHECK: .size ole_v4f32
163 define void @ole_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
166 %1 = load <2 x double>* %a
167 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
168 %2 = load <2 x double>* %b
169 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
170 %3 = fcmp ole <2 x double> %1, %2
171 %4 = sext <2 x i1> %3 to <2 x i64>
172 ; CHECK-DAG: fcle.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
173 store <2 x i64> %4, <2 x i64>* %c
174 ; CHECK-DAG: st.d [[R3]], 0($4)
177 ; CHECK: .size ole_v2f64
180 define void @olt_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
183 %1 = load <4 x float>* %a
184 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
185 %2 = load <4 x float>* %b
186 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
187 %3 = fcmp olt <4 x float> %1, %2
188 %4 = sext <4 x i1> %3 to <4 x i32>
189 ; CHECK-DAG: fclt.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
190 store <4 x i32> %4, <4 x i32>* %c
191 ; CHECK-DAG: st.w [[R3]], 0($4)
194 ; CHECK: .size olt_v4f32
197 define void @olt_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
200 %1 = load <2 x double>* %a
201 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
202 %2 = load <2 x double>* %b
203 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
204 %3 = fcmp olt <2 x double> %1, %2
205 %4 = sext <2 x i1> %3 to <2 x i64>
206 ; CHECK-DAG: fclt.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
207 store <2 x i64> %4, <2 x i64>* %c
208 ; CHECK-DAG: st.d [[R3]], 0($4)
211 ; CHECK: .size olt_v2f64
214 define void @one_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
217 %1 = load <4 x float>* %a
218 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
219 %2 = load <4 x float>* %b
220 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
221 %3 = fcmp one <4 x float> %1, %2
222 %4 = sext <4 x i1> %3 to <4 x i32>
223 ; CHECK-DAG: fcne.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
224 store <4 x i32> %4, <4 x i32>* %c
225 ; CHECK-DAG: st.w [[R3]], 0($4)
228 ; CHECK: .size one_v4f32
231 define void @one_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
234 %1 = load <2 x double>* %a
235 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
236 %2 = load <2 x double>* %b
237 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
238 %3 = fcmp one <2 x double> %1, %2
239 %4 = sext <2 x i1> %3 to <2 x i64>
240 ; CHECK-DAG: fcne.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
241 store <2 x i64> %4, <2 x i64>* %c
242 ; CHECK-DAG: st.d [[R3]], 0($4)
245 ; CHECK: .size one_v2f64
248 define void @ord_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
251 %1 = load <4 x float>* %a
252 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
253 %2 = load <4 x float>* %b
254 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
255 %3 = fcmp ord <4 x float> %1, %2
256 %4 = sext <4 x i1> %3 to <4 x i32>
257 ; CHECK-DAG: fcor.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
258 store <4 x i32> %4, <4 x i32>* %c
259 ; CHECK-DAG: st.w [[R3]], 0($4)
262 ; CHECK: .size ord_v4f32
265 define void @ord_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
268 %1 = load <2 x double>* %a
269 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
270 %2 = load <2 x double>* %b
271 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
272 %3 = fcmp ord <2 x double> %1, %2
273 %4 = sext <2 x i1> %3 to <2 x i64>
274 ; CHECK-DAG: fcor.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
275 store <2 x i64> %4, <2 x i64>* %c
276 ; CHECK-DAG: st.d [[R3]], 0($4)
279 ; CHECK: .size ord_v2f64
282 define void @ueq_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
285 %1 = load <4 x float>* %a
286 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
287 %2 = load <4 x float>* %b
288 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
289 %3 = fcmp ueq <4 x float> %1, %2
290 %4 = sext <4 x i1> %3 to <4 x i32>
291 ; CHECK-DAG: fcueq.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
292 store <4 x i32> %4, <4 x i32>* %c
293 ; CHECK-DAG: st.w [[R3]], 0($4)
296 ; CHECK: .size ueq_v4f32
299 define void @ueq_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
302 %1 = load <2 x double>* %a
303 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
304 %2 = load <2 x double>* %b
305 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
306 %3 = fcmp ueq <2 x double> %1, %2
307 %4 = sext <2 x i1> %3 to <2 x i64>
308 ; CHECK-DAG: fcueq.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
309 store <2 x i64> %4, <2 x i64>* %c
310 ; CHECK-DAG: st.d [[R3]], 0($4)
313 ; CHECK: .size ueq_v2f64
316 define void @uge_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
319 %1 = load <4 x float>* %a
320 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
321 %2 = load <4 x float>* %b
322 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
323 %3 = fcmp uge <4 x float> %1, %2
324 %4 = sext <4 x i1> %3 to <4 x i32>
325 ; CHECK-DAG: fcule.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
326 store <4 x i32> %4, <4 x i32>* %c
327 ; CHECK-DAG: st.w [[R3]], 0($4)
330 ; CHECK: .size uge_v4f32
333 define void @uge_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
336 %1 = load <2 x double>* %a
337 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
338 %2 = load <2 x double>* %b
339 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
340 %3 = fcmp uge <2 x double> %1, %2
341 %4 = sext <2 x i1> %3 to <2 x i64>
342 ; CHECK-DAG: fcule.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
343 store <2 x i64> %4, <2 x i64>* %c
344 ; CHECK-DAG: st.d [[R3]], 0($4)
347 ; CHECK: .size uge_v2f64
350 define void @ugt_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
353 %1 = load <4 x float>* %a
354 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
355 %2 = load <4 x float>* %b
356 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
357 %3 = fcmp ugt <4 x float> %1, %2
358 %4 = sext <4 x i1> %3 to <4 x i32>
359 ; CHECK-DAG: fcult.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
360 store <4 x i32> %4, <4 x i32>* %c
361 ; CHECK-DAG: st.w [[R3]], 0($4)
364 ; CHECK: .size ugt_v4f32
367 define void @ugt_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
370 %1 = load <2 x double>* %a
371 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
372 %2 = load <2 x double>* %b
373 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
374 %3 = fcmp ugt <2 x double> %1, %2
375 %4 = sext <2 x i1> %3 to <2 x i64>
376 ; CHECK-DAG: fcult.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
377 store <2 x i64> %4, <2 x i64>* %c
378 ; CHECK-DAG: st.d [[R3]], 0($4)
381 ; CHECK: .size ugt_v2f64
384 define void @ule_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
387 %1 = load <4 x float>* %a
388 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
389 %2 = load <4 x float>* %b
390 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
391 %3 = fcmp ule <4 x float> %1, %2
392 %4 = sext <4 x i1> %3 to <4 x i32>
393 ; CHECK-DAG: fcule.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
394 store <4 x i32> %4, <4 x i32>* %c
395 ; CHECK-DAG: st.w [[R3]], 0($4)
398 ; CHECK: .size ule_v4f32
401 define void @ule_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
404 %1 = load <2 x double>* %a
405 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
406 %2 = load <2 x double>* %b
407 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
408 %3 = fcmp ule <2 x double> %1, %2
409 %4 = sext <2 x i1> %3 to <2 x i64>
410 ; CHECK-DAG: fcule.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
411 store <2 x i64> %4, <2 x i64>* %c
412 ; CHECK-DAG: st.d [[R3]], 0($4)
415 ; CHECK: .size ule_v2f64
418 define void @ult_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
421 %1 = load <4 x float>* %a
422 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
423 %2 = load <4 x float>* %b
424 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
425 %3 = fcmp ult <4 x float> %1, %2
426 %4 = sext <4 x i1> %3 to <4 x i32>
427 ; CHECK-DAG: fcult.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
428 store <4 x i32> %4, <4 x i32>* %c
429 ; CHECK-DAG: st.w [[R3]], 0($4)
432 ; CHECK: .size ult_v4f32
435 define void @ult_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
438 %1 = load <2 x double>* %a
439 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
440 %2 = load <2 x double>* %b
441 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
442 %3 = fcmp ult <2 x double> %1, %2
443 %4 = sext <2 x i1> %3 to <2 x i64>
444 ; CHECK-DAG: fcult.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
445 store <2 x i64> %4, <2 x i64>* %c
446 ; CHECK-DAG: st.d [[R3]], 0($4)
449 ; CHECK: .size ult_v2f64
452 define void @uno_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
455 %1 = load <4 x float>* %a
456 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
457 %2 = load <4 x float>* %b
458 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
459 %3 = fcmp uno <4 x float> %1, %2
460 %4 = sext <4 x i1> %3 to <4 x i32>
461 ; CHECK-DAG: fcun.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
462 store <4 x i32> %4, <4 x i32>* %c
463 ; CHECK-DAG: st.w [[R3]], 0($4)
466 ; CHECK: .size uno_v4f32
469 define void @uno_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
472 %1 = load <2 x double>* %a
473 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
474 %2 = load <2 x double>* %b
475 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
476 %3 = fcmp uno <2 x double> %1, %2
477 %4 = sext <2 x i1> %3 to <2 x i64>
478 ; CHECK-DAG: fcun.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
479 store <2 x i64> %4, <2 x i64>* %c
480 ; CHECK-DAG: st.d [[R3]], 0($4)
483 ; CHECK: .size uno_v2f64
486 define void @true_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
489 %1 = load <4 x float>* %a
490 %2 = load <4 x float>* %b
491 %3 = fcmp true <4 x float> %1, %2
492 %4 = sext <4 x i1> %3 to <4 x i32>
493 store <4 x i32> %4, <4 x i32>* %c
496 ; (setcc $a, $b, SETTRUE) is always folded, so we won't get fcaf:
497 ; CHECK-DAG: ldi.b [[R1:\$w[0-9]+]], -1
498 ; CHECK-DAG: st.w [[R1]], 0($4)
499 ; CHECK: .size true_v4f32
502 define void @true_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
505 %1 = load <2 x double>* %a
506 %2 = load <2 x double>* %b
507 %3 = fcmp true <2 x double> %1, %2
508 %4 = sext <2 x i1> %3 to <2 x i64>
509 store <2 x i64> %4, <2 x i64>* %c
512 ; FIXME: This code is correct, but poor. Ideally it would be similar to
513 ; the code in @true_v4f32
514 ; CHECK-DAG: ldi.d [[R1:\$w[0-9]+]], 1
515 ; CHECK-DAG: slli.d [[R3:\$w[0-9]+]], [[R1]], 63
516 ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R3]], 63
517 ; CHECK-DAG: st.d [[R4]], 0($4)
518 ; CHECK: .size true_v2f64
521 define void @bsel_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
522 <4 x float>* %c) nounwind {
525 %1 = load <4 x float>* %a
526 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
527 %2 = load <4 x float>* %b
528 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
529 %3 = load <4 x float>* %c
530 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
531 %4 = fcmp ogt <4 x float> %1, %2
532 ; CHECK-DAG: fclt.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
533 %5 = select <4 x i1> %4, <4 x float> %1, <4 x float> %3
534 ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
535 store <4 x float> %5, <4 x float>* %d
536 ; CHECK-DAG: st.w [[R4]], 0($4)
539 ; CHECK: .size bsel_v4f32
542 define void @bsel_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
543 <2 x double>* %c) nounwind {
546 %1 = load <2 x double>* %a
547 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
548 %2 = load <2 x double>* %b
549 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
550 %3 = load <2 x double>* %c
551 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
552 %4 = fcmp ogt <2 x double> %1, %2
553 ; CHECK-DAG: fclt.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
554 %5 = select <2 x i1> %4, <2 x double> %1, <2 x double> %3
555 ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
556 store <2 x double> %5, <2 x double>* %d
557 ; CHECK-DAG: st.d [[R4]], 0($4)
560 ; CHECK: .size bsel_v2f64
563 define void @bseli_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
564 <4 x float>* %c) nounwind {
565 ; CHECK: bseli_v4f32:
567 %1 = load <4 x float>* %a
568 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
569 %2 = load <4 x float>* %b
570 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
571 %3 = fcmp ogt <4 x float> %1, %2
572 ; CHECK-DAG: fclt.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
573 %4 = select <4 x i1> %3, <4 x float> %1, <4 x float> zeroinitializer
574 ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3:\$w[0-9]+]]
575 store <4 x float> %4, <4 x float>* %d
576 ; CHECK-DAG: st.w [[R4]], 0($4)
579 ; CHECK: .size bseli_v4f32
582 define void @bseli_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
583 <2 x double>* %c) nounwind {
584 ; CHECK: bseli_v2f64:
586 %1 = load <2 x double>* %a
587 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
588 %2 = load <2 x double>* %b
589 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
590 %3 = fcmp ogt <2 x double> %1, %2
591 ; CHECK-DAG: fclt.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
592 %4 = select <2 x i1> %3, <2 x double> %1, <2 x double> zeroinitializer
593 ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3:\$w[0-9]+]]
594 store <2 x double> %4, <2 x double>* %d
595 ; CHECK-DAG: st.d [[R4]], 0($4)
598 ; CHECK: .size bseli_v2f64
601 define void @max_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
604 %1 = load <4 x float>* %a
605 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
606 %2 = load <4 x float>* %b
607 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
608 %3 = tail call <4 x float> @llvm.mips.fmax.w(<4 x float> %1, <4 x float> %2)
609 ; CHECK-DAG: fmax.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
610 store <4 x float> %3, <4 x float>* %c
611 ; CHECK-DAG: st.w [[R3]], 0($4)
614 ; CHECK: .size max_v4f32
617 define void @max_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
620 %1 = load <2 x double>* %a
621 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
622 %2 = load <2 x double>* %b
623 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
624 %3 = tail call <2 x double> @llvm.mips.fmax.d(<2 x double> %1, <2 x double> %2)
625 ; CHECK-DAG: fmax.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
626 store <2 x double> %3, <2 x double>* %c
627 ; CHECK-DAG: st.d [[R3]], 0($4)
630 ; CHECK: .size max_v2f64
633 define void @min_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
636 %1 = load <4 x float>* %a
637 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
638 %2 = load <4 x float>* %b
639 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
640 %3 = tail call <4 x float> @llvm.mips.fmin.w(<4 x float> %1, <4 x float> %2)
641 ; CHECK-DAG: fmin.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
642 store <4 x float> %3, <4 x float>* %c
643 ; CHECK-DAG: st.w [[R3]], 0($4)
646 ; CHECK: .size min_v4f32
649 define void @min_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
652 %1 = load <2 x double>* %a
653 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
654 %2 = load <2 x double>* %b
655 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
656 %3 = tail call <2 x double> @llvm.mips.fmin.d(<2 x double> %1, <2 x double> %2)
657 ; CHECK-DAG: fmin.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
658 store <2 x double> %3, <2 x double>* %c
659 ; CHECK-DAG: st.d [[R3]], 0($4)
662 ; CHECK: .size min_v2f64