1 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
3 define { [2 x <16 x i8>] } @test_vld2q_dup_fx_update(i8* %a, i8** %ptr) {
4 ; CHECK-LABEL: test_vld2q_dup_fx_update
5 ; CHECK: ld2r {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}], #2
6 %1 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1)
7 %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
8 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer
9 %4 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
10 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> zeroinitializer
11 %6 = insertvalue { [2 x <16 x i8>] } undef, <16 x i8> %3, 0, 0
12 %7 = insertvalue { [2 x <16 x i8>] } %6, <16 x i8> %5, 0, 1
13 %tmp1 = getelementptr i8* %a, i32 2
14 store i8* %tmp1, i8** %ptr
15 ret { [2 x <16 x i8>] } %7
18 define { [2 x <4 x i32>] } @test_vld2q_dup_reg_update(i32* %a, i32** %ptr, i32 %inc) {
19 ; CHECK-LABEL: test_vld2q_dup_reg_update
20 ; CHECK: ld2r {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
21 %1 = bitcast i32* %a to i8*
22 %2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %1, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4)
23 %3 = extractvalue { <4 x i32>, <4 x i32> } %2, 0
24 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
25 %5 = extractvalue { <4 x i32>, <4 x i32> } %2, 1
26 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <4 x i32> zeroinitializer
27 %7 = insertvalue { [2 x <4 x i32>] } undef, <4 x i32> %4, 0, 0
28 %8 = insertvalue { [2 x <4 x i32>] } %7, <4 x i32> %6, 0, 1
29 %tmp1 = getelementptr i32* %a, i32 %inc
30 store i32* %tmp1, i32** %ptr
31 ret { [2 x <4 x i32>] } %8
34 define { [3 x <4 x i16>] } @test_vld3_dup_fx_update(i16* %a, i16** %ptr) {
35 ; CHECK-LABEL: test_vld3_dup_fx_update
36 ; CHECK: ld3r {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}], #6
37 %1 = bitcast i16* %a to i8*
38 %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %1, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
39 %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 0
40 %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer
41 %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 1
42 %6 = shufflevector <4 x i16> %5, <4 x i16> undef, <4 x i32> zeroinitializer
43 %7 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 2
44 %8 = shufflevector <4 x i16> %7, <4 x i16> undef, <4 x i32> zeroinitializer
45 %9 = insertvalue { [3 x <4 x i16>] } undef, <4 x i16> %4, 0, 0
46 %10 = insertvalue { [3 x <4 x i16>] } %9, <4 x i16> %6, 0, 1
47 %11 = insertvalue { [3 x <4 x i16>] } %10, <4 x i16> %8, 0, 2
48 %tmp1 = getelementptr i16* %a, i32 3
49 store i16* %tmp1, i16** %ptr
50 ret { [3 x <4 x i16>] } %11
53 define { [3 x <8 x i8>] } @test_vld3_dup_reg_update(i8* %a, i8** %ptr, i32 %inc) {
54 ; CHECK-LABEL: test_vld3_dup_reg_update
55 ; CHECK: ld3r {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
56 %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
57 %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
58 %3 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer
59 %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
60 %5 = shufflevector <8 x i8> %4, <8 x i8> undef, <8 x i32> zeroinitializer
61 %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
62 %7 = shufflevector <8 x i8> %6, <8 x i8> undef, <8 x i32> zeroinitializer
63 %8 = insertvalue { [3 x <8 x i8>] } undef, <8 x i8> %3, 0, 0
64 %9 = insertvalue { [3 x <8 x i8>] } %8, <8 x i8> %5, 0, 1
65 %10 = insertvalue { [3 x <8 x i8>] } %9, <8 x i8> %7, 0, 2
66 %tmp1 = getelementptr i8* %a, i32 %inc
67 store i8* %tmp1, i8** %ptr
68 ret { [3 x <8 x i8>] }%10
71 define { [4 x <2 x i32>] } @test_vld4_dup_fx_update(i32* %a, i32** %ptr) #0 {
72 ; CHECK-LABEL: test_vld4_dup_fx_update
73 ; CHECK: ld4r {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}], #16
74 %1 = bitcast i32* %a to i8*
75 %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %1, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
76 %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 0
77 %4 = shufflevector <2 x i32> %3, <2 x i32> undef, <2 x i32> zeroinitializer
78 %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 1
79 %6 = shufflevector <2 x i32> %5, <2 x i32> undef, <2 x i32> zeroinitializer
80 %7 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 2
81 %8 = shufflevector <2 x i32> %7, <2 x i32> undef, <2 x i32> zeroinitializer
82 %9 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 3
83 %10 = shufflevector <2 x i32> %9, <2 x i32> undef, <2 x i32> zeroinitializer
84 %11 = insertvalue { [4 x <2 x i32>] } undef, <2 x i32> %4, 0, 0
85 %12 = insertvalue { [4 x <2 x i32>] } %11, <2 x i32> %6, 0, 1
86 %13 = insertvalue { [4 x <2 x i32>] } %12, <2 x i32> %8, 0, 2
87 %14 = insertvalue { [4 x <2 x i32>] } %13, <2 x i32> %10, 0, 3
88 %tmp1 = getelementptr i32* %a, i32 4
89 store i32* %tmp1, i32** %ptr
90 ret { [4 x <2 x i32>] } %14
93 define { [4 x <2 x double>] } @test_vld4_dup_reg_update(double* %a, double** %ptr, i32 %inc) {
94 ; CHECK-LABEL: test_vld4_dup_reg_update
95 ; CHECK: ld4r {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
96 %1 = bitcast double* %a to i8*
97 %2 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %1, <2 x double> undef, <2 x double> undef, <2 x double> undef, <2 x double> undef, i32 0, i32 8)
98 %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 0
99 %4 = shufflevector <2 x double> %3, <2 x double> undef, <2 x i32> zeroinitializer
100 %5 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 1
101 %6 = shufflevector <2 x double> %5, <2 x double> undef, <2 x i32> zeroinitializer
102 %7 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 2
103 %8 = shufflevector <2 x double> %7, <2 x double> undef, <2 x i32> zeroinitializer
104 %9 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 3
105 %10 = shufflevector <2 x double> %9, <2 x double> undef, <2 x i32> zeroinitializer
106 %11 = insertvalue { [4 x <2 x double>] } undef, <2 x double> %4, 0, 0
107 %12 = insertvalue { [4 x <2 x double>] } %11, <2 x double> %6, 0, 1
108 %13 = insertvalue { [4 x <2 x double>] } %12, <2 x double> %8, 0, 2
109 %14 = insertvalue { [4 x <2 x double>] } %13, <2 x double> %10, 0, 3
110 %tmp1 = getelementptr double* %a, i32 %inc
111 store double* %tmp1, double** %ptr
112 ret { [4 x <2 x double>] } %14
115 define { [2 x <8 x i8>] } @test_vld2_lane_fx_update(i8* %a, [2 x <8 x i8>] %b, i8** %ptr) {
116 ; CHECK-LABEL: test_vld2_lane_fx_update
117 ; CHECK: ld2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [x{{[0-9]+|sp}}], #2
118 %1 = extractvalue [2 x <8 x i8>] %b, 0
119 %2 = extractvalue [2 x <8 x i8>] %b, 1
120 %3 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 7, i32 1)
121 %4 = extractvalue { <8 x i8>, <8 x i8> } %3, 0
122 %5 = extractvalue { <8 x i8>, <8 x i8> } %3, 1
123 %6 = insertvalue { [2 x <8 x i8>] } undef, <8 x i8> %4, 0, 0
124 %7 = insertvalue { [2 x <8 x i8>] } %6, <8 x i8> %5, 0, 1
125 %tmp1 = getelementptr i8* %a, i32 2
126 store i8* %tmp1, i8** %ptr
127 ret { [2 x <8 x i8>] } %7
130 define { [2 x <8 x i8>] } @test_vld2_lane_reg_update(i8* %a, [2 x <8 x i8>] %b, i8** %ptr, i32 %inc) {
131 ; CHECK-LABEL: test_vld2_lane_reg_update
132 ; CHECK: ld2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[6], [x{{[0-9]+|sp}}], x{{[0-9]+}}
133 %1 = extractvalue [2 x <8 x i8>] %b, 0
134 %2 = extractvalue [2 x <8 x i8>] %b, 1
135 %3 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 6, i32 1)
136 %4 = extractvalue { <8 x i8>, <8 x i8> } %3, 0
137 %5 = extractvalue { <8 x i8>, <8 x i8> } %3, 1
138 %6 = insertvalue { [2 x <8 x i8>] } undef, <8 x i8> %4, 0, 0
139 %7 = insertvalue { [2 x <8 x i8>] } %6, <8 x i8> %5, 0, 1
140 %tmp1 = getelementptr i8* %a, i32 %inc
141 store i8* %tmp1, i8** %ptr
142 ret { [2 x <8 x i8>] } %7
145 define { [3 x <2 x float>] } @test_vld3_lane_fx_update(float* %a, [3 x <2 x float>] %b, float** %ptr) {
146 ; CHECK-LABEL: test_vld3_lane_fx_update
147 ; CHECK: ld3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [x{{[0-9]+|sp}}], #12
148 %1 = extractvalue [3 x <2 x float>] %b, 0
149 %2 = extractvalue [3 x <2 x float>] %b, 1
150 %3 = extractvalue [3 x <2 x float>] %b, 2
151 %4 = bitcast float* %a to i8*
152 %5 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8* %4, <2 x float> %1, <2 x float> %2, <2 x float> %3, i32 1, i32 4)
153 %6 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %5, 0
154 %7 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %5, 1
155 %8 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %5, 2
156 %9 = insertvalue { [3 x <2 x float>] } undef, <2 x float> %6, 0, 0
157 %10 = insertvalue { [3 x <2 x float>] } %9, <2 x float> %7, 0, 1
158 %11 = insertvalue { [3 x <2 x float>] } %10, <2 x float> %8, 0, 2
159 %tmp1 = getelementptr float* %a, i32 3
160 store float* %tmp1, float** %ptr
161 ret { [3 x <2 x float>] } %11
164 define { [3 x <4 x i16>] } @test_vld3_lane_reg_update(i16* %a, [3 x <4 x i16>] %b, i16** %ptr, i32 %inc) {
165 ; CHECK-LABEL: test_vld3_lane_reg_update
166 ; CHECK: ld3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [x{{[0-9]+|sp}}], x{{[0-9]+}}
167 %1 = extractvalue [3 x <4 x i16>] %b, 0
168 %2 = extractvalue [3 x <4 x i16>] %b, 1
169 %3 = extractvalue [3 x <4 x i16>] %b, 2
170 %4 = bitcast i16* %a to i8*
171 %5 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 3, i32 2)
172 %6 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %5, 0
173 %7 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %5, 1
174 %8 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %5, 2
175 %9 = insertvalue { [3 x <4 x i16>] } undef, <4 x i16> %6, 0, 0
176 %10 = insertvalue { [3 x <4 x i16>] } %9, <4 x i16> %7, 0, 1
177 %11 = insertvalue { [3 x <4 x i16>] } %10, <4 x i16> %8, 0, 2
178 %tmp1 = getelementptr i16* %a, i32 %inc
179 store i16* %tmp1, i16** %ptr
180 ret { [3 x <4 x i16>] } %11
183 define { [4 x <2 x i32>] } @test_vld4_lane_fx_update(i32* readonly %a, [4 x <2 x i32>] %b, i32** %ptr) {
184 ; CHECK-LABEL: test_vld4_lane_fx_update
185 ; CHECK: ld4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [x{{[0-9]+|sp}}], #16
186 %1 = extractvalue [4 x <2 x i32>] %b, 0
187 %2 = extractvalue [4 x <2 x i32>] %b, 1
188 %3 = extractvalue [4 x <2 x i32>] %b, 2
189 %4 = extractvalue [4 x <2 x i32>] %b, 3
190 %5 = bitcast i32* %a to i8*
191 %6 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %5, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, <2 x i32> %4, i32 1, i32 4)
192 %7 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 0
193 %8 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 1
194 %9 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 2
195 %10 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 3
196 %11 = insertvalue { [4 x <2 x i32>] } undef, <2 x i32> %7, 0, 0
197 %12 = insertvalue { [4 x <2 x i32>] } %11, <2 x i32> %8, 0, 1
198 %13 = insertvalue { [4 x <2 x i32>] } %12, <2 x i32> %9, 0, 2
199 %14 = insertvalue { [4 x <2 x i32>] } %13, <2 x i32> %10, 0, 3
200 %tmp1 = getelementptr i32* %a, i32 4
201 store i32* %tmp1, i32** %ptr
202 ret { [4 x <2 x i32>] } %14
205 define { [4 x <2 x double>] } @test_vld4_lane_reg_update(double* readonly %a, [4 x <2 x double>] %b, double** %ptr, i32 %inc) {
206 ; CHECK-LABEL: test_vld4_lane_reg_update
207 ; CHECK: ld4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [x{{[0-9]+|sp}}], x{{[0-9]+}}
208 %1 = extractvalue [4 x <2 x double>] %b, 0
209 %2 = extractvalue [4 x <2 x double>] %b, 1
210 %3 = extractvalue [4 x <2 x double>] %b, 2
211 %4 = extractvalue [4 x <2 x double>] %b, 3
212 %5 = bitcast double* %a to i8*
213 %6 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 1, i32 8)
214 %7 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 0
215 %8 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 1
216 %9 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 2
217 %10 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 3
218 %11 = insertvalue { [4 x <2 x double>] } undef, <2 x double> %7, 0, 0
219 %12 = insertvalue { [4 x <2 x double>] } %11, <2 x double> %8, 0, 1
220 %13 = insertvalue { [4 x <2 x double>] } %12, <2 x double> %9, 0, 2
221 %14 = insertvalue { [4 x <2 x double>] } %13, <2 x double> %10, 0, 3
222 %tmp1 = getelementptr double* %a, i32 %inc
223 store double* %tmp1, double** %ptr
224 ret { [4 x <2 x double>] } %14
227 define void @test_vst2_lane_fx_update(i8* %a, [2 x <8 x i8>] %b, i8** %ptr) {
228 ; CHECK-LABEL: test_vst2_lane_fx_update
229 ; CHECK: st2 {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [x{{[0-9]+|sp}}], #2
230 %1 = extractvalue [2 x <8 x i8>] %b, 0
231 %2 = extractvalue [2 x <8 x i8>] %b, 1
232 call void @llvm.arm.neon.vst2lane.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 7, i32 1)
233 %tmp1 = getelementptr i8* %a, i32 2
234 store i8* %tmp1, i8** %ptr
238 define void @test_vst2_lane_reg_update(i32* %a, [2 x <2 x i32>] %b.coerce, i32** %ptr, i32 %inc) {
239 ; CHECK-LABEL: test_vst2_lane_reg_update
240 ; CHECK: st2 {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [x{{[0-9]+|sp}}], x{{[0-9]+}}
241 %1 = extractvalue [2 x <2 x i32>] %b.coerce, 0
242 %2 = extractvalue [2 x <2 x i32>] %b.coerce, 1
243 %3 = bitcast i32* %a to i8*
244 tail call void @llvm.arm.neon.vst2lane.v2i32(i8* %3, <2 x i32> %1, <2 x i32> %2, i32 1, i32 4)
245 %tmp1 = getelementptr i32* %a, i32 %inc
246 store i32* %tmp1, i32** %ptr
250 define void @test_vst3_lane_fx_update(float* %a, [3 x <4 x float>] %b, float** %ptr) {
251 ; CHECK-LABEL: test_vst3_lane_fx_update
252 ; CHECK: st3 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [x{{[0-9]+|sp}}], #12
253 %1 = extractvalue [3 x <4 x float>] %b, 0
254 %2 = extractvalue [3 x <4 x float>] %b, 1
255 %3 = extractvalue [3 x <4 x float>] %b, 2
256 %4 = bitcast float* %a to i8*
257 call void @llvm.arm.neon.vst3lane.v4f32(i8* %4, <4 x float> %1, <4 x float> %2, <4 x float> %3, i32 3, i32 4)
258 %tmp1 = getelementptr float* %a, i32 3
259 store float* %tmp1, float** %ptr
263 ; Function Attrs: nounwind
264 define void @test_vst3_lane_reg_update(i16* %a, [3 x <4 x i16>] %b, i16** %ptr, i32 %inc) {
265 ; CHECK-LABEL: test_vst3_lane_reg_update
266 ; CHECK: st3 {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [x{{[0-9]+|sp}}], x{{[0-9]+}}
267 %1 = extractvalue [3 x <4 x i16>] %b, 0
268 %2 = extractvalue [3 x <4 x i16>] %b, 1
269 %3 = extractvalue [3 x <4 x i16>] %b, 2
270 %4 = bitcast i16* %a to i8*
271 tail call void @llvm.arm.neon.vst3lane.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 3, i32 2)
272 %tmp1 = getelementptr i16* %a, i32 %inc
273 store i16* %tmp1, i16** %ptr
277 define void @test_vst4_lane_fx_update(double* %a, [4 x <2 x double>] %b.coerce, double** %ptr) {
278 ; CHECK-LABEL: test_vst4_lane_fx_update
279 ; CHECK: st4 {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [x{{[0-9]+|sp}}], #32
280 %1 = extractvalue [4 x <2 x double>] %b.coerce, 0
281 %2 = extractvalue [4 x <2 x double>] %b.coerce, 1
282 %3 = extractvalue [4 x <2 x double>] %b.coerce, 2
283 %4 = extractvalue [4 x <2 x double>] %b.coerce, 3
284 %5 = bitcast double* %a to i8*
285 tail call void @llvm.arm.neon.vst4lane.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 1, i32 8)
286 %tmp1 = getelementptr double* %a, i32 4
287 store double* %tmp1, double** %ptr
292 define void @test_vst4_lane_reg_update(float* %a, [4 x <2 x float>] %b.coerce, float** %ptr, i32 %inc) {
293 ; CHECK-LABEL: test_vst4_lane_reg_update
294 ; CHECK: st4 {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [x{{[0-9]+|sp}}], x{{[0-9]+}}
295 %1 = extractvalue [4 x <2 x float>] %b.coerce, 0
296 %2 = extractvalue [4 x <2 x float>] %b.coerce, 1
297 %3 = extractvalue [4 x <2 x float>] %b.coerce, 2
298 %4 = extractvalue [4 x <2 x float>] %b.coerce, 3
299 %5 = bitcast float* %a to i8*
300 tail call void @llvm.arm.neon.vst4lane.v2f32(i8* %5, <2 x float> %1, <2 x float> %2, <2 x float> %3, <2 x float> %4, i32 1, i32 4)
301 %tmp1 = getelementptr float* %a, i32 %inc
302 store float* %tmp1, float** %ptr
306 declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32)
307 declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32)
308 declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32)
309 declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
310 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32)
311 declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32)
312 declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32)
313 declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
314 declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32)
315 declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32)
316 declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32)
317 declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
318 declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32)
319 declare void @llvm.arm.neon.vst4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32)