1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
3 define i32 @test_rev_w(i32 %a) nounwind {
5 ; CHECK-LABEL: test_rev_w:
7 %0 = tail call i32 @llvm.bswap.i32(i32 %a)
11 define i64 @test_rev_x(i64 %a) nounwind {
13 ; CHECK-LABEL: test_rev_x:
15 %0 = tail call i64 @llvm.bswap.i64(i64 %a)
19 declare i32 @llvm.bswap.i32(i32) nounwind readnone
20 declare i64 @llvm.bswap.i64(i64) nounwind readnone
22 define i32 @test_rev16_w(i32 %X) nounwind {
24 ; CHECK-LABEL: test_rev16_w:
26 %tmp1 = lshr i32 %X, 8
27 %X15 = bitcast i32 %X to i32
28 %tmp4 = shl i32 %X15, 8
29 %tmp2 = and i32 %tmp1, 16711680
30 %tmp5 = and i32 %tmp4, -16777216
31 %tmp9 = and i32 %tmp1, 255
32 %tmp13 = and i32 %tmp4, 65280
33 %tmp6 = or i32 %tmp5, %tmp2
34 %tmp10 = or i32 %tmp6, %tmp13
35 %tmp14 = or i32 %tmp10, %tmp9
39 ; 64-bit REV16 is *not* a swap then a 16-bit rotation:
40 ; 01234567 ->(bswap) 76543210 ->(rotr) 10765432
41 ; 01234567 ->(rev16) 10325476
42 define i64 @test_rev16_x(i64 %a) nounwind {
44 ; CHECK-LABEL: test_rev16_x:
45 ; CHECK-NOT: rev16 x0, x0
46 %0 = tail call i64 @llvm.bswap.i64(i64 %a)
53 define i64 @test_rev32_x(i64 %a) nounwind {
55 ; CHECK-LABEL: test_rev32_x:
57 %0 = tail call i64 @llvm.bswap.i64(i64 %a)
64 define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
65 ;CHECK-LABEL: test_vrev64D8:
67 %tmp1 = load <8 x i8>* %A
68 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
72 define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
73 ;CHECK-LABEL: test_vrev64D16:
75 %tmp1 = load <4 x i16>* %A
76 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
80 define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
81 ;CHECK-LABEL: test_vrev64D32:
83 %tmp1 = load <2 x i32>* %A
84 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
88 define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
89 ;CHECK-LABEL: test_vrev64Df:
91 %tmp1 = load <2 x float>* %A
92 %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
96 define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
97 ;CHECK-LABEL: test_vrev64Q8:
99 %tmp1 = load <16 x i8>* %A
100 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
104 define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
105 ;CHECK-LABEL: test_vrev64Q16:
107 %tmp1 = load <8 x i16>* %A
108 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
112 define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
113 ;CHECK-LABEL: test_vrev64Q32:
115 %tmp1 = load <4 x i32>* %A
116 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
120 define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
121 ;CHECK-LABEL: test_vrev64Qf:
123 %tmp1 = load <4 x float>* %A
124 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
125 ret <4 x float> %tmp2
128 define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
129 ;CHECK-LABEL: test_vrev32D8:
131 %tmp1 = load <8 x i8>* %A
132 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
136 define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
137 ;CHECK-LABEL: test_vrev32D16:
139 %tmp1 = load <4 x i16>* %A
140 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
144 define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
145 ;CHECK-LABEL: test_vrev32Q8:
147 %tmp1 = load <16 x i8>* %A
148 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
152 define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
153 ;CHECK-LABEL: test_vrev32Q16:
155 %tmp1 = load <8 x i16>* %A
156 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
160 define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
161 ;CHECK-LABEL: test_vrev16D8:
163 %tmp1 = load <8 x i8>* %A
164 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
168 define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
169 ;CHECK-LABEL: test_vrev16Q8:
171 %tmp1 = load <16 x i8>* %A
172 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
176 ; Undef shuffle indices should not prevent matching to VREV:
178 define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
179 ;CHECK-LABEL: test_vrev64D8_undef:
181 %tmp1 = load <8 x i8>* %A
182 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
186 define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
187 ;CHECK-LABEL: test_vrev32Q16_undef:
189 %tmp1 = load <8 x i16>* %A
190 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
194 ; vrev <4 x i16> should use REV32 and not REV64
195 define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
196 ; CHECK-LABEL: test_vrev64:
197 ; CHECK: ldr [[DEST:q[0-9]+]],
201 %0 = bitcast <4 x i16>* %source to <8 x i16>*
202 %tmp2 = load <8 x i16>* %0, align 4
203 %tmp3 = extractelement <8 x i16> %tmp2, i32 6
204 %tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
205 %tmp9 = extractelement <8 x i16> %tmp2, i32 5
206 %tmp11 = insertelement <2 x i16> %tmp5, i16 %tmp9, i32 1
207 store <2 x i16> %tmp11, <2 x i16>* %dst, align 4
211 ; Test vrev of float4
212 define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest) nounwind noinline ssp {
213 ; CHECK: float_vrev64
214 ; CHECK: ldr [[DEST:q[0-9]+]],
217 %0 = bitcast float* %source to <4 x float>*
218 %tmp2 = load <4 x float>* %0, align 4
219 %tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
220 %arrayidx8 = getelementptr inbounds <4 x float>* %dest, i32 11
221 store <4 x float> %tmp5, <4 x float>* %arrayidx8, align 4
226 define <4 x i32> @test_vrev32_bswap(<4 x i32> %source) nounwind {
227 ; CHECK-LABEL: test_vrev32_bswap:
231 %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source)
235 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone