1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
2 ; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
4 define <8 x i8> @vld1i8(i8* %A) nounwind {
6 ;Check the alignment value. Max for this instruction is 64 bits:
7 ;CHECK: vld1.8 {d16}, [r0:64]
8 %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 16)
12 define <4 x i16> @vld1i16(i16* %A) nounwind {
13 ;CHECK-LABEL: vld1i16:
15 %tmp0 = bitcast i16* %A to i8*
16 %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
20 ;Check for a post-increment updating load.
21 define <4 x i16> @vld1i16_update(i16** %ptr) nounwind {
22 ;CHECK-LABEL: vld1i16_update:
23 ;CHECK: vld1.16 {d16}, [{{r[0-9]+}}]!
25 %tmp0 = bitcast i16* %A to i8*
26 %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
27 %tmp2 = getelementptr i16* %A, i32 4
28 store i16* %tmp2, i16** %ptr
32 define <2 x i32> @vld1i32(i32* %A) nounwind {
33 ;CHECK-LABEL: vld1i32:
35 %tmp0 = bitcast i32* %A to i8*
36 %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
40 ;Check for a post-increment updating load with register increment.
41 define <2 x i32> @vld1i32_update(i32** %ptr, i32 %inc) nounwind {
42 ;CHECK-LABEL: vld1i32_update:
43 ;CHECK: vld1.32 {d16}, [{{r[0-9]+}}], {{r[0-9]+}}
45 %tmp0 = bitcast i32* %A to i8*
46 %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
47 %tmp2 = getelementptr i32* %A, i32 %inc
48 store i32* %tmp2, i32** %ptr
52 define <2 x float> @vld1f(float* %A) nounwind {
55 %tmp0 = bitcast float* %A to i8*
56 %tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %tmp0, i32 1)
60 define <1 x i64> @vld1i64(i64* %A) nounwind {
61 ;CHECK-LABEL: vld1i64:
63 %tmp0 = bitcast i64* %A to i8*
64 %tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %tmp0, i32 1)
68 define <16 x i8> @vld1Qi8(i8* %A) nounwind {
69 ;CHECK-LABEL: vld1Qi8:
70 ;Check the alignment value. Max for this instruction is 128 bits:
71 ;CHECK: vld1.8 {d16, d17}, [r0:64]
72 %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
76 ;Check for a post-increment updating load.
77 define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
78 ;CHECK-LABEL: vld1Qi8_update:
79 ;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}:64]!
81 %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
82 %tmp2 = getelementptr i8* %A, i32 16
83 store i8* %tmp2, i8** %ptr
87 define <8 x i16> @vld1Qi16(i16* %A) nounwind {
88 ;CHECK-LABEL: vld1Qi16:
89 ;Check the alignment value. Max for this instruction is 128 bits:
90 ;CHECK: vld1.16 {d16, d17}, [r0:128]
91 %tmp0 = bitcast i16* %A to i8*
92 %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 32)
96 define <4 x i32> @vld1Qi32(i32* %A) nounwind {
97 ;CHECK-LABEL: vld1Qi32:
99 %tmp0 = bitcast i32* %A to i8*
100 %tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %tmp0, i32 1)
104 define <4 x float> @vld1Qf(float* %A) nounwind {
105 ;CHECK-LABEL: vld1Qf:
107 %tmp0 = bitcast float* %A to i8*
108 %tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %tmp0, i32 1)
109 ret <4 x float> %tmp1
112 define <2 x i64> @vld1Qi64(i64* %A) nounwind {
113 ;CHECK-LABEL: vld1Qi64:
115 %tmp0 = bitcast i64* %A to i8*
116 %tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %tmp0, i32 1)
120 declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly
121 declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32) nounwind readonly
122 declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) nounwind readonly
123 declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) nounwind readonly
124 declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly
126 declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) nounwind readonly
127 declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
128 declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly
129 declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
130 declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly
133 ; Do not crash if the vld1 result is not used.
134 define void @unused_vld1_result() {
136 %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1)
137 call void @llvm.trap()
141 declare void @llvm.trap() nounwind