ret void
}
+;Check for a post-increment updating store.
+define void @vst1lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
+;CHECK: vst1lanei8_update:
+;CHECK: vst1.8 {d16[3]}, [r2]!
+ %A = load i8** %ptr
+ %tmp1 = load <8 x i8>* %B
+ %tmp2 = extractelement <8 x i8> %tmp1, i32 3
+ store i8 %tmp2, i8* %A, align 8
+ %tmp3 = getelementptr i8* %A, i32 1
+ store i8* %tmp3, i8** %ptr
+ ret void
+}
+
define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vst1lanei16:
;Check the alignment value. Max for this instruction is 16 bits:
define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK: vst1lanef:
-;CHECK: vst1.32 {d16[1]}, [r0]
+;CHECK: vst1.32 {d16[1]}, [r0, :32]
%tmp1 = load <2 x float>* %B
%tmp2 = extractelement <2 x float> %tmp1, i32 1
store float %tmp2, float* %A
define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
;CHECK: vst1laneQi8:
-;CHECK: vst1.8 {d17[1]}, [r0]
+; // Can use scalar load. No need to use vectors.
+; // CHE-CK: vst1.8 {d17[1]}, [r0]
%tmp1 = load <16 x i8>* %B
%tmp2 = extractelement <16 x i8> %tmp1, i32 9
store i8 %tmp2, i8* %A, align 8
define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst1laneQi32:
-;CHECK: vst1.32 {d17[1]}, [r0, :32]
+; // Can use scalar load. No need to use vectors.
+; // CHE-CK: vst1.32 {d17[1]}, [r0, :32]
%tmp1 = load <4 x i32>* %B
%tmp2 = extractelement <4 x i32> %tmp1, i32 3
store i32 %tmp2, i32* %A, align 8
ret void
}
+;Check for a post-increment updating store.
+define void @vst1laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
+;CHECK: vst1laneQi32_update:
+; // Can use scalar load. No need to use vectors.
+; // CHE-CK: vst1.32 {d17[1]}, [r1, :32]!
+ %A = load i32** %ptr
+ %tmp1 = load <4 x i32>* %B
+ %tmp2 = extractelement <4 x i32> %tmp1, i32 3
+ store i32 %tmp2, i32* %A, align 8
+ %tmp3 = getelementptr i32* %A, i32 1
+ store i32* %tmp3, i32** %ptr
+ ret void
+}
+
define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vst1laneQf:
-;CHECK: vst1.32 {d17[1]}, [r0]
+; // Can use scalar load. No need to use vectors.
+; // CHE-CK: vst1.32 {d17[1]}, [r0]
%tmp1 = load <4 x float>* %B
%tmp2 = extractelement <4 x float> %tmp1, i32 3
store float %tmp2, float* %A
ret void
}
+; Make sure this doesn't crash; PR10258
+define <8 x i16> @variable_insertelement(<8 x i16> %a, i16 %b, i32 %c) nounwind readnone {
+;CHECK: variable_insertelement:
+ %r = insertelement <8 x i16> %a, i16 %b, i32 %c
+ ret <8 x i16> %r
+}
+
declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind