1 ; RUN: llc -march=mips -mattr=+msa < %s | FileCheck %s
3 define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
6 %1 = load <16 x i8>* %a
7 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
8 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
9 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
10 ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
11 store <16 x i8> %2, <16 x i8>* %c
12 ; CHECK-DAG: st.b [[R3]], 0($4)
15 ; CHECK: .size vshf_v16i8_0
18 define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
19 ; CHECK: vshf_v16i8_1:
21 %1 = load <16 x i8>* %a
22 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
23 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
24 ; CHECK-DAG: ldi.b [[R3:\$w[0-9]+]], 1
25 ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
26 store <16 x i8> %2, <16 x i8>* %c
27 ; CHECK-DAG: st.b [[R3]], 0($4)
30 ; CHECK: .size vshf_v16i8_1
33 define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
34 ; CHECK: vshf_v16i8_2:
36 %1 = load <16 x i8>* %a
37 %2 = load <16 x i8>* %b
38 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
39 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
40 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
41 ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R2]]
42 store <16 x i8> %3, <16 x i8>* %c
43 ; CHECK-DAG: st.b [[R3]], 0($4)
46 ; CHECK: .size vshf_v16i8_2
49 define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
50 ; CHECK: vshf_v16i8_3:
52 %1 = load <16 x i8>* %a
53 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
54 %2 = load <16 x i8>* %b
55 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
56 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 17, i32 24, i32 25, i32 18, i32 19, i32 20, i32 28, i32 19, i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
57 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
58 ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R2]]
59 store <16 x i8> %3, <16 x i8>* %c
60 ; CHECK-DAG: st.b [[R3]], 0($4)
63 ; CHECK: .size vshf_v16i8_3
66 define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
67 ; CHECK: vshf_v16i8_4:
69 %1 = load <16 x i8>* %a
70 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
71 %2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17>
72 ; CHECK-DAG: ldi.b [[R3:\$w[0-9]+]], 1
73 ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
74 store <16 x i8> %2, <16 x i8>* %c
75 ; CHECK-DAG: st.b [[R3]], 0($4)
78 ; CHECK: .size vshf_v16i8_4
81 define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
82 ; CHECK: vshf_v8i16_0:
84 %1 = load <8 x i16>* %a
85 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
86 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
87 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
88 ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
89 store <8 x i16> %2, <8 x i16>* %c
90 ; CHECK-DAG: st.h [[R3]], 0($4)
93 ; CHECK: .size vshf_v8i16_0
96 define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
97 ; CHECK: vshf_v8i16_1:
99 %1 = load <8 x i16>* %a
100 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
101 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
102 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
103 ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
104 store <8 x i16> %2, <8 x i16>* %c
105 ; CHECK-DAG: st.h [[R3]], 0($4)
108 ; CHECK: .size vshf_v8i16_1
111 define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
112 ; CHECK: vshf_v8i16_2:
114 %1 = load <8 x i16>* %a
115 %2 = load <8 x i16>* %b
116 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
117 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
118 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
119 ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R2]]
120 store <8 x i16> %3, <8 x i16>* %c
121 ; CHECK-DAG: st.h [[R3]], 0($4)
124 ; CHECK: .size vshf_v8i16_2
127 define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
128 ; CHECK: vshf_v8i16_3:
130 %1 = load <8 x i16>* %a
131 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
132 %2 = load <8 x i16>* %b
133 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
134 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
135 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
136 ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R2]]
137 store <8 x i16> %3, <8 x i16>* %c
138 ; CHECK-DAG: st.h [[R3]], 0($4)
141 ; CHECK: .size vshf_v8i16_3
144 define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
145 ; CHECK: vshf_v8i16_4:
147 %1 = load <8 x i16>* %a
148 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
149 %2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> <i32 1, i32 9, i32 1, i32 9, i32 1, i32 9, i32 1, i32 9>
150 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
151 ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
152 store <8 x i16> %2, <8 x i16>* %c
153 ; CHECK-DAG: st.h [[R3]], 0($4)
156 ; CHECK: .size vshf_v8i16_4
159 define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
160 ; CHECK: vshf_v4i32_0:
162 %1 = load <4 x i32>* %a
163 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
164 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
165 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], %lo
166 ; CHECK-DAG: vshf.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
167 store <4 x i32> %2, <4 x i32>* %c
168 ; CHECK-DAG: st.w [[R3]], 0($4)
171 ; CHECK: .size vshf_v4i32_0
174 define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
175 ; CHECK: vshf_v4i32_1:
177 %1 = load <4 x i32>* %a
178 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
179 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
180 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
181 ; CHECK-DAG: vshf.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
182 store <4 x i32> %2, <4 x i32>* %c
183 ; CHECK-DAG: st.w [[R3]], 0($4)
186 ; CHECK: .size vshf_v4i32_1
189 define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
190 ; CHECK: vshf_v4i32_2:
192 %1 = load <4 x i32>* %a
193 %2 = load <4 x i32>* %b
194 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
195 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 5, i32 6, i32 4>
196 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], %lo
197 ; CHECK-DAG: vshf.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
198 store <4 x i32> %3, <4 x i32>* %c
199 ; CHECK-DAG: st.w [[R3]], 0($4)
202 ; CHECK: .size vshf_v4i32_2
205 define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
206 ; CHECK: vshf_v4i32_3:
208 %1 = load <4 x i32>* %a
209 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
210 %2 = load <4 x i32>* %b
211 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
212 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 6, i32 4>
213 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], %lo
214 ; CHECK-DAG: vshf.w [[R3]], [[R1]], [[R2]]
215 store <4 x i32> %3, <4 x i32>* %c
216 ; CHECK-DAG: st.w [[R3]], 0($4)
219 ; CHECK: .size vshf_v4i32_3
222 define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
223 ; CHECK: vshf_v4i32_4:
225 %1 = load <4 x i32>* %a
226 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
227 %2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 1>
228 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
229 ; CHECK-DAG: vshf.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
230 store <4 x i32> %2, <4 x i32>* %c
231 ; CHECK-DAG: st.w [[R3]], 0($4)
234 ; CHECK: .size vshf_v4i32_4
237 define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
238 ; CHECK: vshf_v2i64_0:
240 %1 = load <2 x i64>* %a
241 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
242 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
243 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
244 ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
245 store <2 x i64> %2, <2 x i64>* %c
246 ; CHECK-DAG: st.d [[R3]], 0($4)
249 ; CHECK: .size vshf_v2i64_0
252 define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
253 ; CHECK: vshf_v2i64_1:
255 %1 = load <2 x i64>* %a
256 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
257 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
258 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
259 ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
260 store <2 x i64> %2, <2 x i64>* %c
261 ; CHECK-DAG: st.d [[R3]], 0($4)
264 ; CHECK: .size vshf_v2i64_1
267 define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
268 ; CHECK: vshf_v2i64_2:
270 %1 = load <2 x i64>* %a
271 %2 = load <2 x i64>* %b
272 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
273 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 2>
274 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
275 ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R2]]
276 store <2 x i64> %3, <2 x i64>* %c
277 ; CHECK-DAG: st.d [[R3]], 0($4)
280 ; CHECK: .size vshf_v2i64_2
283 define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
284 ; CHECK: vshf_v2i64_3:
286 %1 = load <2 x i64>* %a
287 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
288 %2 = load <2 x i64>* %b
289 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
290 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 2>
291 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
292 ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R2]]
293 store <2 x i64> %3, <2 x i64>* %c
294 ; CHECK-DAG: st.d [[R3]], 0($4)
297 ; CHECK: .size vshf_v2i64_3
300 define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
301 ; CHECK: vshf_v2i64_4:
303 %1 = load <2 x i64>* %a
304 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
305 %2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> <i32 1, i32 3>
306 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
307 ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
308 store <2 x i64> %2, <2 x i64>* %c
309 ; CHECK-DAG: st.d [[R3]], 0($4)
312 ; CHECK: .size vshf_v2i64_4