1 ; Test the bitcast operation for big-endian and little-endian.
3 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=BIGENDIAN %s
4 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=LITENDIAN %s
6 define void @v16i8_to_v16i8(<16 x i8>* %src, <16 x i8>* %dst) nounwind {
8 %0 = load volatile <16 x i8>* %src
9 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
10 %2 = bitcast <16 x i8> %1 to <16 x i8>
11 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
12 store <16 x i8> %3, <16 x i8>* %dst
16 ; LITENDIAN: v16i8_to_v16i8:
17 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
18 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
19 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
20 ; LITENDIAN: st.b [[R3]],
21 ; LITENDIAN: .size v16i8_to_v16i8
23 ; BIGENDIAN: v16i8_to_v16i8:
24 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
25 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
26 ; BIGENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
27 ; BIGENDIAN: st.b [[R3]],
28 ; BIGENDIAN: .size v16i8_to_v16i8
30 define void @v16i8_to_v8i16(<16 x i8>* %src, <8 x i16>* %dst) nounwind {
32 %0 = load volatile <16 x i8>* %src
33 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
34 %2 = bitcast <16 x i8> %1 to <8 x i16>
35 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
36 store <8 x i16> %3, <8 x i16>* %dst
40 ; LITENDIAN: v16i8_to_v8i16:
41 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
42 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
43 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
44 ; LITENDIAN: st.h [[R3]],
45 ; LITENDIAN: .size v16i8_to_v8i16
47 ; BIGENDIAN: v16i8_to_v8i16:
48 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
49 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
50 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 177
51 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
52 ; BIGENDIAN: st.h [[R4]],
53 ; BIGENDIAN: .size v16i8_to_v8i16
55 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
56 ; are no operations for v8f16 to put in the way.
57 define void @v16i8_to_v8f16(<16 x i8>* %src, <8 x half>* %dst) nounwind {
59 %0 = load volatile <16 x i8>* %src
60 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
61 %2 = bitcast <16 x i8> %1 to <8 x half>
62 store <8 x half> %2, <8 x half>* %dst
66 ; LITENDIAN: v16i8_to_v8f16:
67 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
68 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
69 ; LITENDIAN: st.b [[R2]],
70 ; LITENDIAN: .size v16i8_to_v8f16
72 ; BIGENDIAN: v16i8_to_v8f16:
73 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
74 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
75 ; BIGENDIAN: st.b [[R2]],
76 ; BIGENDIAN: .size v16i8_to_v8f16
78 define void @v16i8_to_v4i32(<16 x i8>* %src, <4 x i32>* %dst) nounwind {
80 %0 = load volatile <16 x i8>* %src
81 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
82 %2 = bitcast <16 x i8> %1 to <4 x i32>
83 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
84 store <4 x i32> %3, <4 x i32>* %dst
88 ; LITENDIAN: v16i8_to_v4i32:
89 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
90 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
91 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
92 ; LITENDIAN: st.w [[R3]],
93 ; LITENDIAN: .size v16i8_to_v4i32
95 ; BIGENDIAN: v16i8_to_v4i32:
96 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
97 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
98 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
99 ; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
100 ; BIGENDIAN: st.w [[R4]],
101 ; BIGENDIAN: .size v16i8_to_v4i32
103 define void @v16i8_to_v4f32(<16 x i8>* %src, <4 x float>* %dst) nounwind {
105 %0 = load volatile <16 x i8>* %src
106 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
107 %2 = bitcast <16 x i8> %1 to <4 x float>
108 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
109 store <4 x float> %3, <4 x float>* %dst
113 ; LITENDIAN: v16i8_to_v4f32:
114 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
115 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
116 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
117 ; LITENDIAN: st.w [[R3]],
118 ; LITENDIAN: .size v16i8_to_v4f32
120 ; BIGENDIAN: v16i8_to_v4f32:
121 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
122 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
123 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
124 ; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
125 ; BIGENDIAN: st.w [[R4]],
126 ; BIGENDIAN: .size v16i8_to_v4f32
128 define void @v16i8_to_v2i64(<16 x i8>* %src, <2 x i64>* %dst) nounwind {
130 %0 = load volatile <16 x i8>* %src
131 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
132 %2 = bitcast <16 x i8> %1 to <2 x i64>
133 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
134 store <2 x i64> %3, <2 x i64>* %dst
138 ; LITENDIAN: v16i8_to_v2i64:
139 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
140 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
141 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
142 ; LITENDIAN: st.d [[R3]],
143 ; LITENDIAN: .size v16i8_to_v2i64
145 ; BIGENDIAN: v16i8_to_v2i64:
146 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
147 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
148 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
149 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
150 ; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
151 ; BIGENDIAN: st.d [[R4]],
152 ; BIGENDIAN: .size v16i8_to_v2i64
154 define void @v16i8_to_v2f64(<16 x i8>* %src, <2 x double>* %dst) nounwind {
156 %0 = load volatile <16 x i8>* %src
157 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
158 %2 = bitcast <16 x i8> %1 to <2 x double>
159 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
160 store <2 x double> %3, <2 x double>* %dst
164 ; LITENDIAN: v16i8_to_v2f64:
165 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
166 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
167 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
168 ; LITENDIAN: st.d [[R3]],
169 ; LITENDIAN: .size v16i8_to_v2f64
171 ; BIGENDIAN: v16i8_to_v2f64:
172 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
173 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
174 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
175 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
176 ; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
177 ; BIGENDIAN: st.d [[R4]],
178 ; BIGENDIAN: .size v16i8_to_v2f64
180 define void @v8i16_to_v16i8(<8 x i16>* %src, <16 x i8>* %dst) nounwind {
182 %0 = load volatile <8 x i16>* %src
183 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
184 %2 = bitcast <8 x i16> %1 to <16 x i8>
185 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
186 store <16 x i8> %3, <16 x i8>* %dst
190 ; LITENDIAN: v8i16_to_v16i8:
191 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
192 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
193 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
194 ; LITENDIAN: st.b [[R3]],
195 ; LITENDIAN: .size v8i16_to_v16i8
197 ; BIGENDIAN: v8i16_to_v16i8:
198 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
199 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
200 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 177
201 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R2]], [[R2]]
202 ; BIGENDIAN: st.b [[R4]],
203 ; BIGENDIAN: .size v8i16_to_v16i8
205 define void @v8i16_to_v8i16(<8 x i16>* %src, <8 x i16>* %dst) nounwind {
207 %0 = load volatile <8 x i16>* %src
208 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
209 %2 = bitcast <8 x i16> %1 to <8 x i16>
210 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
211 store <8 x i16> %3, <8 x i16>* %dst
215 ; LITENDIAN: v8i16_to_v8i16:
216 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
217 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
218 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
219 ; LITENDIAN: st.h [[R3]],
220 ; LITENDIAN: .size v8i16_to_v8i16
222 ; BIGENDIAN: v8i16_to_v8i16:
223 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
224 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
225 ; BIGENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
226 ; BIGENDIAN: st.h [[R3]],
227 ; BIGENDIAN: .size v8i16_to_v8i16
229 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
230 ; are no operations for v8f16 to put in the way.
231 define void @v8i16_to_v8f16(<8 x i16>* %src, <8 x half>* %dst) nounwind {
233 %0 = load volatile <8 x i16>* %src
234 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
235 %2 = bitcast <8 x i16> %1 to <8 x half>
236 store <8 x half> %2, <8 x half>* %dst
240 ; LITENDIAN: v8i16_to_v8f16:
241 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
242 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
243 ; LITENDIAN: st.h [[R2]],
244 ; LITENDIAN: .size v8i16_to_v8f16
246 ; BIGENDIAN: v8i16_to_v8f16:
247 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
248 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
249 ; BIGENDIAN: st.h [[R2]],
250 ; BIGENDIAN: .size v8i16_to_v8f16
252 define void @v8i16_to_v4i32(<8 x i16>* %src, <4 x i32>* %dst) nounwind {
254 %0 = load volatile <8 x i16>* %src
255 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
256 %2 = bitcast <8 x i16> %1 to <4 x i32>
257 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
258 store <4 x i32> %3, <4 x i32>* %dst
262 ; LITENDIAN: v8i16_to_v4i32:
263 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
264 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
265 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
266 ; LITENDIAN: st.w [[R3]],
267 ; LITENDIAN: .size v8i16_to_v4i32
269 ; BIGENDIAN: v8i16_to_v4i32:
270 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
271 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
272 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
273 ; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
274 ; BIGENDIAN: st.w [[R4]],
275 ; BIGENDIAN: .size v8i16_to_v4i32
277 define void @v8i16_to_v4f32(<8 x i16>* %src, <4 x float>* %dst) nounwind {
279 %0 = load volatile <8 x i16>* %src
280 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
281 %2 = bitcast <8 x i16> %1 to <4 x float>
282 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
283 store <4 x float> %3, <4 x float>* %dst
287 ; LITENDIAN: v8i16_to_v4f32:
288 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
289 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
290 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
291 ; LITENDIAN: st.w [[R3]],
292 ; LITENDIAN: .size v8i16_to_v4f32
294 ; BIGENDIAN: v8i16_to_v4f32:
295 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
296 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
297 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
298 ; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
299 ; BIGENDIAN: st.w [[R4]],
300 ; BIGENDIAN: .size v8i16_to_v4f32
302 define void @v8i16_to_v2i64(<8 x i16>* %src, <2 x i64>* %dst) nounwind {
304 %0 = load volatile <8 x i16>* %src
305 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
306 %2 = bitcast <8 x i16> %1 to <2 x i64>
307 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
308 store <2 x i64> %3, <2 x i64>* %dst
312 ; LITENDIAN: v8i16_to_v2i64:
313 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
314 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
315 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
316 ; LITENDIAN: st.d [[R3]],
317 ; LITENDIAN: .size v8i16_to_v2i64
319 ; BIGENDIAN: v8i16_to_v2i64:
320 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
321 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
322 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
323 ; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
324 ; BIGENDIAN: st.d [[R4]],
325 ; BIGENDIAN: .size v8i16_to_v2i64
327 define void @v8i16_to_v2f64(<8 x i16>* %src, <2 x double>* %dst) nounwind {
329 %0 = load volatile <8 x i16>* %src
330 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
331 %2 = bitcast <8 x i16> %1 to <2 x double>
332 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
333 store <2 x double> %3, <2 x double>* %dst
337 ; LITENDIAN: v8i16_to_v2f64:
338 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
339 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
340 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
341 ; LITENDIAN: st.d [[R3]],
342 ; LITENDIAN: .size v8i16_to_v2f64
344 ; BIGENDIAN: v8i16_to_v2f64:
345 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
346 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
347 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
348 ; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
349 ; BIGENDIAN: st.d [[R4]],
350 ; BIGENDIAN: .size v8i16_to_v2f64
353 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
354 ; are no operations for v8f16 to put in the way.
355 define void @v8f16_to_v16i8(<8 x half>* %src, <16 x i8>* %dst) nounwind {
357 %0 = load volatile <8 x half>* %src
358 %1 = bitcast <8 x half> %0 to <16 x i8>
359 %2 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %1, <16 x i8> %1)
360 store <16 x i8> %2, <16 x i8>* %dst
364 ; LITENDIAN: v8f16_to_v16i8:
365 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
366 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
367 ; LITENDIAN: st.b [[R3]],
368 ; LITENDIAN: .size v8f16_to_v16i8
370 ; BIGENDIAN: v8f16_to_v16i8:
371 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
372 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R1]], 177
373 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R2]], [[R2]]
374 ; BIGENDIAN: st.b [[R4]],
375 ; BIGENDIAN: .size v8f16_to_v16i8
377 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
378 ; are no operations for v8f16 to put in the way.
379 define void @v8f16_to_v8i16(<8 x half>* %src, <8 x i16>* %dst) nounwind {
381 %0 = load volatile <8 x half>* %src
382 %1 = bitcast <8 x half> %0 to <8 x i16>
383 %2 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %1, <8 x i16> %1)
384 store <8 x i16> %2, <8 x i16>* %dst
388 ; LITENDIAN: v8f16_to_v8i16:
389 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
390 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
391 ; LITENDIAN: st.h [[R2]],
392 ; LITENDIAN: .size v8f16_to_v8i16
394 ; BIGENDIAN: v8f16_to_v8i16:
395 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
396 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
397 ; BIGENDIAN: st.h [[R2]],
398 ; BIGENDIAN: .size v8f16_to_v8i16
400 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
401 ; are no operations for v8f16 to put in the way.
402 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
403 ; are no operations for v8f16 to put in the way.
404 define void @v8f16_to_v8f16(<8 x half>* %src, <8 x half>* %dst) nounwind {
406 %0 = load volatile <8 x half>* %src
407 %1 = bitcast <8 x half> %0 to <8 x half>
408 store <8 x half> %1, <8 x half>* %dst
412 ; LITENDIAN: v8f16_to_v8f16:
413 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
414 ; LITENDIAN: st.h [[R1]],
415 ; LITENDIAN: .size v8f16_to_v8f16
417 ; BIGENDIAN: v8f16_to_v8f16:
418 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
419 ; BIGENDIAN: st.h [[R1]],
420 ; BIGENDIAN: .size v8f16_to_v8f16
422 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
423 ; are no operations for v8f16 to put in the way.
424 define void @v8f16_to_v4i32(<8 x half>* %src, <4 x i32>* %dst) nounwind {
426 %0 = load volatile <8 x half>* %src
427 %1 = bitcast <8 x half> %0 to <4 x i32>
428 %2 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %1, <4 x i32> %1)
429 store <4 x i32> %2, <4 x i32>* %dst
433 ; LITENDIAN: v8f16_to_v4i32:
434 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
435 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
436 ; LITENDIAN: st.w [[R2]],
437 ; LITENDIAN: .size v8f16_to_v4i32
439 ; BIGENDIAN: v8f16_to_v4i32:
440 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
441 ; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177
442 ; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
443 ; BIGENDIAN: st.w [[R3]],
444 ; BIGENDIAN: .size v8f16_to_v4i32
446 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
447 ; are no operations for v8f16 to put in the way.
448 define void @v8f16_to_v4f32(<8 x half>* %src, <4 x float>* %dst) nounwind {
450 %0 = load volatile <8 x half>* %src
451 %1 = bitcast <8 x half> %0 to <4 x float>
452 %2 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %1, <4 x float> %1)
453 store <4 x float> %2, <4 x float>* %dst
457 ; LITENDIAN: v8f16_to_v4f32:
458 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
459 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
460 ; LITENDIAN: st.w [[R2]],
461 ; LITENDIAN: .size v8f16_to_v4f32
463 ; BIGENDIAN: v8f16_to_v4f32:
464 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
465 ; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177
466 ; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
467 ; BIGENDIAN: st.w [[R3]],
468 ; BIGENDIAN: .size v8f16_to_v4f32
470 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
471 ; are no operations for v8f16 to put in the way.
472 define void @v8f16_to_v2i64(<8 x half>* %src, <2 x i64>* %dst) nounwind {
474 %0 = load volatile <8 x half>* %src
475 %1 = bitcast <8 x half> %0 to <2 x i64>
476 %2 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %1, <2 x i64> %1)
477 store <2 x i64> %2, <2 x i64>* %dst
481 ; LITENDIAN: v8f16_to_v2i64:
482 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
483 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
484 ; LITENDIAN: st.d [[R2]],
485 ; LITENDIAN: .size v8f16_to_v2i64
487 ; BIGENDIAN: v8f16_to_v2i64:
488 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
489 ; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27
490 ; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
491 ; BIGENDIAN: st.d [[R3]],
492 ; BIGENDIAN: .size v8f16_to_v2i64
494 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
495 ; are no operations for v8f16 to put in the way.
496 define void @v8f16_to_v2f64(<8 x half>* %src, <2 x double>* %dst) nounwind {
498 %0 = load volatile <8 x half>* %src
499 %1 = bitcast <8 x half> %0 to <2 x double>
500 %2 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %1, <2 x double> %1)
501 store <2 x double> %2, <2 x double>* %dst
505 ; LITENDIAN: v8f16_to_v2f64:
506 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
507 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
508 ; LITENDIAN: st.d [[R2]],
509 ; LITENDIAN: .size v8f16_to_v2f64
511 ; BIGENDIAN: v8f16_to_v2f64:
512 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
513 ; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27
514 ; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
515 ; BIGENDIAN: st.d [[R3]],
516 ; BIGENDIAN: .size v8f16_to_v2f64
519 define void @v4i32_to_v16i8(<4 x i32>* %src, <16 x i8>* %dst) nounwind {
521 %0 = load volatile <4 x i32>* %src
522 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
523 %2 = bitcast <4 x i32> %1 to <16 x i8>
524 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
525 store <16 x i8> %3, <16 x i8>* %dst
529 ; LITENDIAN: v4i32_to_v16i8:
530 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
531 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
532 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
533 ; LITENDIAN: st.b [[R3]],
534 ; LITENDIAN: .size v4i32_to_v16i8
536 ; BIGENDIAN: v4i32_to_v16i8:
537 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
538 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
539 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
540 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
541 ; BIGENDIAN: st.b [[R4]],
542 ; BIGENDIAN: .size v4i32_to_v16i8
544 define void @v4i32_to_v8i16(<4 x i32>* %src, <8 x i16>* %dst) nounwind {
546 %0 = load volatile <4 x i32>* %src
547 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
548 %2 = bitcast <4 x i32> %1 to <8 x i16>
549 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
550 store <8 x i16> %3, <8 x i16>* %dst
554 ; LITENDIAN: v4i32_to_v8i16:
555 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
556 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
557 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
558 ; LITENDIAN: st.h [[R3]],
559 ; LITENDIAN: .size v4i32_to_v8i16
561 ; BIGENDIAN: v4i32_to_v8i16:
562 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
563 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
564 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
565 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
566 ; BIGENDIAN: st.h [[R4]],
567 ; BIGENDIAN: .size v4i32_to_v8i16
569 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
570 ; are no operations for v8f16 to put in the way.
571 define void @v4i32_to_v8f16(<4 x i32>* %src, <8 x half>* %dst) nounwind {
573 %0 = load volatile <4 x i32>* %src
574 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
575 %2 = bitcast <4 x i32> %1 to <8 x half>
576 store <8 x half> %2, <8 x half>* %dst
580 ; LITENDIAN: v4i32_to_v8f16:
581 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
582 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
583 ; LITENDIAN: st.w [[R2]],
584 ; LITENDIAN: .size v4i32_to_v8f16
586 ; BIGENDIAN: v4i32_to_v8f16:
587 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
588 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
589 ; BIGENDIAN: st.w [[R2]],
590 ; BIGENDIAN: .size v4i32_to_v8f16
592 define void @v4i32_to_v4i32(<4 x i32>* %src, <4 x i32>* %dst) nounwind {
594 %0 = load volatile <4 x i32>* %src
595 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
596 %2 = bitcast <4 x i32> %1 to <4 x i32>
597 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
598 store <4 x i32> %3, <4 x i32>* %dst
602 ; LITENDIAN: v4i32_to_v4i32:
603 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
604 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
605 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
606 ; LITENDIAN: st.w [[R3]],
607 ; LITENDIAN: .size v4i32_to_v4i32
609 ; BIGENDIAN: v4i32_to_v4i32:
610 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
611 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
612 ; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
613 ; BIGENDIAN: st.w [[R3]],
614 ; BIGENDIAN: .size v4i32_to_v4i32
616 define void @v4i32_to_v4f32(<4 x i32>* %src, <4 x float>* %dst) nounwind {
618 %0 = load volatile <4 x i32>* %src
619 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
620 %2 = bitcast <4 x i32> %1 to <4 x float>
621 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
622 store <4 x float> %3, <4 x float>* %dst
626 ; LITENDIAN: v4i32_to_v4f32:
627 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
628 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
629 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
630 ; LITENDIAN: st.w [[R3]],
631 ; LITENDIAN: .size v4i32_to_v4f32
633 ; BIGENDIAN: v4i32_to_v4f32:
634 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
635 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
636 ; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
637 ; BIGENDIAN: st.w [[R3]],
638 ; BIGENDIAN: .size v4i32_to_v4f32
640 define void @v4i32_to_v2i64(<4 x i32>* %src, <2 x i64>* %dst) nounwind {
642 %0 = load volatile <4 x i32>* %src
643 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
644 %2 = bitcast <4 x i32> %1 to <2 x i64>
645 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
646 store <2 x i64> %3, <2 x i64>* %dst
650 ; LITENDIAN: v4i32_to_v2i64:
651 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
652 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
653 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
654 ; LITENDIAN: st.d [[R3]],
655 ; LITENDIAN: .size v4i32_to_v2i64
657 ; BIGENDIAN: v4i32_to_v2i64:
658 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
659 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
660 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
661 ; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
662 ; BIGENDIAN: st.d [[R4]],
663 ; BIGENDIAN: .size v4i32_to_v2i64
665 define void @v4i32_to_v2f64(<4 x i32>* %src, <2 x double>* %dst) nounwind {
667 %0 = load volatile <4 x i32>* %src
668 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
669 %2 = bitcast <4 x i32> %1 to <2 x double>
670 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
671 store <2 x double> %3, <2 x double>* %dst
675 ; LITENDIAN: v4i32_to_v2f64:
676 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
677 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
678 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
679 ; LITENDIAN: st.d [[R3]],
680 ; LITENDIAN: .size v4i32_to_v2f64
682 ; BIGENDIAN: v4i32_to_v2f64:
683 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
684 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
685 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
686 ; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
687 ; BIGENDIAN: st.d [[R4]],
688 ; BIGENDIAN: .size v4i32_to_v2f64
690 define void @v4f32_to_v16i8(<4 x float>* %src, <16 x i8>* %dst) nounwind {
692 %0 = load volatile <4 x float>* %src
693 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
694 %2 = bitcast <4 x float> %1 to <16 x i8>
695 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
696 store <16 x i8> %3, <16 x i8>* %dst
700 ; LITENDIAN: v4f32_to_v16i8:
701 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
702 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
703 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
704 ; LITENDIAN: st.b [[R3]],
705 ; LITENDIAN: .size v4f32_to_v16i8
707 ; BIGENDIAN: v4f32_to_v16i8:
708 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
709 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
710 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
711 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
712 ; BIGENDIAN: st.b [[R4]],
713 ; BIGENDIAN: .size v4f32_to_v16i8
715 define void @v4f32_to_v8i16(<4 x float>* %src, <8 x i16>* %dst) nounwind {
717 %0 = load volatile <4 x float>* %src
718 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
719 %2 = bitcast <4 x float> %1 to <8 x i16>
720 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
721 store <8 x i16> %3, <8 x i16>* %dst
725 ; LITENDIAN: v4f32_to_v8i16:
726 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
727 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
728 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
729 ; LITENDIAN: st.h [[R3]],
730 ; LITENDIAN: .size v4f32_to_v8i16
732 ; BIGENDIAN: v4f32_to_v8i16:
733 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
734 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
735 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
736 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
737 ; BIGENDIAN: st.h [[R4]],
738 ; BIGENDIAN: .size v4f32_to_v8i16
740 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
741 ; are no operations for v8f16 to put in the way.
742 define void @v4f32_to_v8f16(<4 x float>* %src, <8 x half>* %dst) nounwind {
744 %0 = load volatile <4 x float>* %src
745 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
746 %2 = bitcast <4 x float> %1 to <8 x half>
747 store <8 x half> %2, <8 x half>* %dst
751 ; LITENDIAN: v4f32_to_v8f16:
752 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
753 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
754 ; LITENDIAN: st.w [[R2]],
755 ; LITENDIAN: .size v4f32_to_v8f16
757 ; BIGENDIAN: v4f32_to_v8f16:
758 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
759 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
760 ; BIGENDIAN: st.w [[R2]],
761 ; BIGENDIAN: .size v4f32_to_v8f16
763 define void @v4f32_to_v4i32(<4 x float>* %src, <4 x i32>* %dst) nounwind {
765 %0 = load volatile <4 x float>* %src
766 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
767 %2 = bitcast <4 x float> %1 to <4 x i32>
768 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
769 store <4 x i32> %3, <4 x i32>* %dst
773 ; LITENDIAN: v4f32_to_v4i32:
774 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
775 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
776 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
777 ; LITENDIAN: st.w [[R3]],
778 ; LITENDIAN: .size v4f32_to_v4i32
780 ; BIGENDIAN: v4f32_to_v4i32:
781 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
782 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
783 ; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
784 ; BIGENDIAN: st.w [[R3]],
785 ; BIGENDIAN: .size v4f32_to_v4i32
787 define void @v4f32_to_v4f32(<4 x float>* %src, <4 x float>* %dst) nounwind {
789 %0 = load volatile <4 x float>* %src
790 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
791 %2 = bitcast <4 x float> %1 to <4 x float>
792 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
793 store <4 x float> %3, <4 x float>* %dst
797 ; LITENDIAN: v4f32_to_v4f32:
798 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
799 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
800 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
801 ; LITENDIAN: st.w [[R3]],
802 ; LITENDIAN: .size v4f32_to_v4f32
804 ; BIGENDIAN: v4f32_to_v4f32:
805 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
806 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
807 ; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
808 ; BIGENDIAN: st.w [[R3]],
809 ; BIGENDIAN: .size v4f32_to_v4f32
811 define void @v4f32_to_v2i64(<4 x float>* %src, <2 x i64>* %dst) nounwind {
813 %0 = load volatile <4 x float>* %src
814 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
815 %2 = bitcast <4 x float> %1 to <2 x i64>
816 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
817 store <2 x i64> %3, <2 x i64>* %dst
821 ; LITENDIAN: v4f32_to_v2i64:
822 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
823 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
824 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
825 ; LITENDIAN: st.d [[R3]],
826 ; LITENDIAN: .size v4f32_to_v2i64
828 ; BIGENDIAN: v4f32_to_v2i64:
829 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
830 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
831 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
832 ; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
833 ; BIGENDIAN: st.d [[R4]],
834 ; BIGENDIAN: .size v4f32_to_v2i64
836 define void @v4f32_to_v2f64(<4 x float>* %src, <2 x double>* %dst) nounwind {
838 %0 = load volatile <4 x float>* %src
839 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
840 %2 = bitcast <4 x float> %1 to <2 x double>
841 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
842 store <2 x double> %3, <2 x double>* %dst
846 ; LITENDIAN: v4f32_to_v2f64:
847 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
848 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
849 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
850 ; LITENDIAN: st.d [[R3]],
851 ; LITENDIAN: .size v4f32_to_v2f64
853 ; BIGENDIAN: v4f32_to_v2f64:
854 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
855 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
856 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
857 ; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
858 ; BIGENDIAN: st.d [[R4]],
859 ; BIGENDIAN: .size v4f32_to_v2f64
861 define void @v2i64_to_v16i8(<2 x i64>* %src, <16 x i8>* %dst) nounwind {
863 %0 = load volatile <2 x i64>* %src
864 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
865 %2 = bitcast <2 x i64> %1 to <16 x i8>
866 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
867 store <16 x i8> %3, <16 x i8>* %dst
871 ; LITENDIAN: v2i64_to_v16i8:
872 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
873 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
874 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
875 ; LITENDIAN: st.b [[R3]],
876 ; LITENDIAN: .size v2i64_to_v16i8
878 ; BIGENDIAN: v2i64_to_v16i8:
879 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
880 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
881 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
882 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
883 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
884 ; BIGENDIAN: st.b [[R4]],
885 ; BIGENDIAN: .size v2i64_to_v16i8
887 define void @v2i64_to_v8i16(<2 x i64>* %src, <8 x i16>* %dst) nounwind {
889 %0 = load volatile <2 x i64>* %src
890 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
891 %2 = bitcast <2 x i64> %1 to <8 x i16>
892 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
893 store <8 x i16> %3, <8 x i16>* %dst
897 ; LITENDIAN: v2i64_to_v8i16:
898 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
899 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
900 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
901 ; LITENDIAN: st.h [[R3]],
902 ; LITENDIAN: .size v2i64_to_v8i16
904 ; BIGENDIAN: v2i64_to_v8i16:
905 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
906 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
907 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
908 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
909 ; BIGENDIAN: st.h [[R4]],
910 ; BIGENDIAN: .size v2i64_to_v8i16
912 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
913 ; are no operations for v8f16 to put in the way.
914 define void @v2i64_to_v8f16(<2 x i64>* %src, <8 x half>* %dst) nounwind {
916 %0 = load volatile <2 x i64>* %src
917 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
918 %2 = bitcast <2 x i64> %1 to <8 x half>
919 store <8 x half> %2, <8 x half>* %dst
923 ; LITENDIAN: v2i64_to_v8f16:
924 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
925 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
926 ; LITENDIAN: st.d [[R2]],
927 ; LITENDIAN: .size v2i64_to_v8f16
929 ; BIGENDIAN: v2i64_to_v8f16:
930 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
931 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
932 ; BIGENDIAN: st.d [[R2]],
933 ; BIGENDIAN: .size v2i64_to_v8f16
935 define void @v2i64_to_v4i32(<2 x i64>* %src, <4 x i32>* %dst) nounwind {
937 %0 = load volatile <2 x i64>* %src
938 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
939 %2 = bitcast <2 x i64> %1 to <4 x i32>
940 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
941 store <4 x i32> %3, <4 x i32>* %dst
945 ; LITENDIAN: v2i64_to_v4i32:
946 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
947 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
948 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
949 ; LITENDIAN: st.w [[R3]],
950 ; LITENDIAN: .size v2i64_to_v4i32
952 ; BIGENDIAN: v2i64_to_v4i32:
953 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
954 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
955 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
956 ; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
957 ; BIGENDIAN: st.w [[R4]],
958 ; BIGENDIAN: .size v2i64_to_v4i32
960 define void @v2i64_to_v4f32(<2 x i64>* %src, <4 x float>* %dst) nounwind {
962 %0 = load volatile <2 x i64>* %src
963 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
964 %2 = bitcast <2 x i64> %1 to <4 x float>
965 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
966 store <4 x float> %3, <4 x float>* %dst
970 ; LITENDIAN: v2i64_to_v4f32:
971 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
972 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
973 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
974 ; LITENDIAN: st.w [[R3]],
975 ; LITENDIAN: .size v2i64_to_v4f32
977 ; BIGENDIAN: v2i64_to_v4f32:
978 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
979 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
980 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
981 ; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
982 ; BIGENDIAN: st.w [[R4]],
983 ; BIGENDIAN: .size v2i64_to_v4f32
985 define void @v2i64_to_v2i64(<2 x i64>* %src, <2 x i64>* %dst) nounwind {
987 %0 = load volatile <2 x i64>* %src
988 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
989 %2 = bitcast <2 x i64> %1 to <2 x i64>
990 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
991 store <2 x i64> %3, <2 x i64>* %dst
995 ; LITENDIAN: v2i64_to_v2i64:
996 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
997 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
998 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
999 ; LITENDIAN: st.d [[R3]],
1000 ; LITENDIAN: .size v2i64_to_v2i64
1002 ; BIGENDIAN: v2i64_to_v2i64:
1003 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1004 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1005 ; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1006 ; BIGENDIAN: st.d [[R3]],
1007 ; BIGENDIAN: .size v2i64_to_v2i64
1009 define void @v2i64_to_v2f64(<2 x i64>* %src, <2 x double>* %dst) nounwind {
1011 %0 = load volatile <2 x i64>* %src
1012 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
1013 %2 = bitcast <2 x i64> %1 to <2 x double>
1014 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
1015 store <2 x double> %3, <2 x double>* %dst
1019 ; LITENDIAN: v2i64_to_v2f64:
1020 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1021 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1022 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1023 ; LITENDIAN: st.d [[R3]],
1024 ; LITENDIAN: .size v2i64_to_v2f64
1026 ; BIGENDIAN: v2i64_to_v2f64:
1027 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1028 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1029 ; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1030 ; BIGENDIAN: st.d [[R3]],
1031 ; BIGENDIAN: .size v2i64_to_v2f64
1033 define void @v2f64_to_v16i8(<2 x double>* %src, <16 x i8>* %dst) nounwind {
1035 %0 = load volatile <2 x double>* %src
1036 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1037 %2 = bitcast <2 x double> %1 to <16 x i8>
1038 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
1039 store <16 x i8> %3, <16 x i8>* %dst
1043 ; LITENDIAN: v2f64_to_v16i8:
1044 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1045 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1046 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1047 ; LITENDIAN: st.b [[R3]],
1048 ; LITENDIAN: .size v2f64_to_v16i8
1050 ; BIGENDIAN: v2f64_to_v16i8:
1051 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1052 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1053 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
1054 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
1055 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
1056 ; BIGENDIAN: st.b [[R4]],
1057 ; BIGENDIAN: .size v2f64_to_v16i8
1059 define void @v2f64_to_v8i16(<2 x double>* %src, <8 x i16>* %dst) nounwind {
1061 %0 = load volatile <2 x double>* %src
1062 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1063 %2 = bitcast <2 x double> %1 to <8 x i16>
1064 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
1065 store <8 x i16> %3, <8 x i16>* %dst
1069 ; LITENDIAN: v2f64_to_v8i16:
1070 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1071 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1072 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1073 ; LITENDIAN: st.h [[R3]],
1074 ; LITENDIAN: .size v2f64_to_v8i16
1076 ; BIGENDIAN: v2f64_to_v8i16:
1077 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1078 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1079 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
1080 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
1081 ; BIGENDIAN: st.h [[R4]],
1082 ; BIGENDIAN: .size v2f64_to_v8i16
1084 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
1085 ; are no operations for v8f16 to put in the way.
1086 define void @v2f64_to_v8f16(<2 x double>* %src, <8 x half>* %dst) nounwind {
1088 %0 = load volatile <2 x double>* %src
1089 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1090 %2 = bitcast <2 x double> %1 to <8 x half>
1091 store <8 x half> %2, <8 x half>* %dst
1095 ; LITENDIAN: v2f64_to_v8f16:
1096 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1097 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1098 ; LITENDIAN: st.d [[R2]],
1099 ; LITENDIAN: .size v2f64_to_v8f16
1101 ; BIGENDIAN: v2f64_to_v8f16:
1102 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1103 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1104 ; BIGENDIAN: st.d [[R2]],
1105 ; BIGENDIAN: .size v2f64_to_v8f16
1107 define void @v2f64_to_v4i32(<2 x double>* %src, <4 x i32>* %dst) nounwind {
1109 %0 = load volatile <2 x double>* %src
1110 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1111 %2 = bitcast <2 x double> %1 to <4 x i32>
1112 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
1113 store <4 x i32> %3, <4 x i32>* %dst
1117 ; LITENDIAN: v2f64_to_v4i32:
1118 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1119 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1120 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1121 ; LITENDIAN: st.w [[R3]],
1122 ; LITENDIAN: .size v2f64_to_v4i32
1124 ; BIGENDIAN: v2f64_to_v4i32:
1125 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1126 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1127 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
1128 ; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
1129 ; BIGENDIAN: st.w [[R4]],
1130 ; BIGENDIAN: .size v2f64_to_v4i32
1132 define void @v2f64_to_v4f32(<2 x double>* %src, <4 x float>* %dst) nounwind {
1134 %0 = load volatile <2 x double>* %src
1135 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1136 %2 = bitcast <2 x double> %1 to <4 x float>
1137 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
1138 store <4 x float> %3, <4 x float>* %dst
1142 ; LITENDIAN: v2f64_to_v4f32:
1143 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1144 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1145 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1146 ; LITENDIAN: st.w [[R3]],
1147 ; LITENDIAN: .size v2f64_to_v4f32
1149 ; BIGENDIAN: v2f64_to_v4f32:
1150 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1151 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1152 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
1153 ; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
1154 ; BIGENDIAN: st.w [[R4]],
1155 ; BIGENDIAN: .size v2f64_to_v4f32
1157 define void @v2f64_to_v2i64(<2 x double>* %src, <2 x i64>* %dst) nounwind {
1159 %0 = load volatile <2 x double>* %src
1160 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1161 %2 = bitcast <2 x double> %1 to <2 x i64>
1162 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
1163 store <2 x i64> %3, <2 x i64>* %dst
1167 ; LITENDIAN: v2f64_to_v2i64:
1168 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1169 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1170 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1171 ; LITENDIAN: st.d [[R3]],
1172 ; LITENDIAN: .size v2f64_to_v2i64
1174 ; BIGENDIAN: v2f64_to_v2i64:
1175 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1176 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1177 ; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1178 ; BIGENDIAN: st.d [[R3]],
1179 ; BIGENDIAN: .size v2f64_to_v2i64
1181 define void @v2f64_to_v2f64(<2 x double>* %src, <2 x double>* %dst) nounwind {
1183 %0 = load volatile <2 x double>* %src
1184 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1185 %2 = bitcast <2 x double> %1 to <2 x double>
1186 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
1187 store <2 x double> %3, <2 x double>* %dst
1191 ; LITENDIAN: v2f64_to_v2f64:
1192 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1193 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1194 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1195 ; LITENDIAN: st.d [[R3]],
1196 ; LITENDIAN: .size v2f64_to_v2f64
1198 ; BIGENDIAN: v2f64_to_v2f64:
1199 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1200 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1201 ; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1202 ; BIGENDIAN: st.d [[R3]],
1203 ; BIGENDIAN: .size v2f64_to_v2f64
1205 declare <16 x i8> @llvm.mips.addv.b(<16 x i8>, <16 x i8>) nounwind
1206 declare <8 x i16> @llvm.mips.addv.h(<8 x i16>, <8 x i16>) nounwind
1207 declare <4 x i32> @llvm.mips.addv.w(<4 x i32>, <4 x i32>) nounwind
1208 declare <2 x i64> @llvm.mips.addv.d(<2 x i64>, <2 x i64>) nounwind
1209 declare <4 x float> @llvm.mips.fadd.w(<4 x float>, <4 x float>) nounwind
1210 declare <2 x double> @llvm.mips.fadd.d(<2 x double>, <2 x double>) nounwind