1 ; RUN: opt < %s -instcombine -S | FileCheck %s
7 define <2 x i64> @test_extrq_call(<2 x i64> %x, <16 x i8> %y) {
8 ; CHECK-LABEL: @test_extrq_call
9 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
10 ; CHECK-NEXT: ret <2 x i64> %1
11 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
15 define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) {
16 ; CHECK-LABEL: @test_extrq_zero_arg0
17 ; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
18 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> zeroinitializer, <16 x i8> %y) nounwind
22 define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) {
23 ; CHECK-LABEL: @test_extrq_zero_arg1
24 ; CHECK-NEXT: ret <2 x i64> %x
25 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind
29 define <2 x i64> @test_extrq_to_extqi(<2 x i64> %x, <16 x i8> %y) {
30 ; CHECK-LABEL: @test_extrq_to_extqi
31 ; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 15)
32 ; CHECK-NEXT: ret <2 x i64> %1
33 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
37 define <2 x i64> @test_extrq_constant(<2 x i64> %x, <16 x i8> %y) {
38 ; CHECK-LABEL: @test_extrq_constant
39 ; CHECK-NEXT: ret <2 x i64> <i64 255, i64 undef>
40 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 55>, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
44 define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) {
45 ; CHECK-LABEL: @test_extrq_constant_undef
46 ; CHECK-NEXT: ret <2 x i64> <i64 65535, i64 undef>
47 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 undef>, <16 x i8> <i8 16, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
55 define <2 x i64> @test_extrqi_call(<2 x i64> %x) {
56 ; CHECK-LABEL: @test_extrqi_call
57 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
58 ; CHECK-NEXT: ret <2 x i64> %1
59 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
63 define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) {
64 ; CHECK-LABEL: @test_extrqi_shuffle_1zuu
65 ; CHECK-NEXT: %1 = bitcast <2 x i64> %x to <16 x i8>
66 ; CHECK-NEXT: %2 = shufflevector <16 x i8> %1, <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
67 ; CHECK-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64>
68 ; CHECK-NEXT: ret <2 x i64> %3
69 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 32, i8 32)
73 define <2 x i64> @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(<2 x i64> %x) {
74 ; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu
75 ; CHECK-NEXT: %1 = bitcast <2 x i64> %x to <16 x i8>
76 ; CHECK-NEXT: %2 = shufflevector <16 x i8> %1, <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 2, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
77 ; CHECK-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64>
78 ; CHECK-NEXT: ret <2 x i64> %3
79 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 16)
83 define <2 x i64> @test_extrqi_undef(<2 x i64> %x) {
84 ; CHECK-LABEL: @test_extrqi_undef
85 ; CHECK-NEXT: ret <2 x i64> undef
86 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 32, i8 33)
90 define <2 x i64> @test_extrqi_zero(<2 x i64> %x) {
91 ; CHECK-LABEL: @test_extrqi_zero
92 ; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
93 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 3, i8 18)
97 define <2 x i64> @test_extrqi_constant(<2 x i64> %x) {
98 ; CHECK-LABEL: @test_extrqi_constant
99 ; CHECK-NEXT: ret <2 x i64> <i64 7, i64 undef>
100 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 55>, i8 3, i8 18)
104 define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) {
105 ; CHECK-LABEL: @test_extrqi_constant_undef
106 ; CHECK-NEXT: ret <2 x i64> <i64 15, i64 undef>
107 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 undef>, i8 4, i8 18)
115 define <2 x i64> @test_insertq_call(<2 x i64> %x, <2 x i64> %y) {
116 ; CHECK-LABEL: @test_insertq_call
117 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
118 ; CHECK-NEXT: ret <2 x i64> %1
119 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
123 define <2 x i64> @test_insertq_to_insertqi(<2 x i64> %x, <2 x i64> %y) {
124 ; CHECK-LABEL: @test_insertq_to_insertqi
125 ; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 8, i64 undef>, i8 18, i8 2)
126 ; CHECK-NEXT: ret <2 x i64> %1
127 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> <i64 8, i64 658>) nounwind
131 define <2 x i64> @test_insertq_constant(<2 x i64> %x, <2 x i64> %y) {
132 ; CHECK-LABEL: @test_insertq_constant
133 ; CHECK-NEXT: ret <2 x i64> <i64 32, i64 undef>
134 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 8, i64 658>) nounwind
138 define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) {
139 ; CHECK-LABEL: @test_insertq_constant_undef
140 ; CHECK-NEXT: ret <2 x i64> <i64 33, i64 undef>
141 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 1, i64 undef>, <2 x i64> <i64 8, i64 658>) nounwind
149 define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) {
150 ; CHECK-LABEL: @test_insertqi_shuffle_04uu
151 ; CHECK-NEXT: %1 = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
152 ; CHECK-NEXT: ret <16 x i8> %1
153 %1 = bitcast <16 x i8> %v to <2 x i64>
154 %2 = bitcast <16 x i8> %i to <2 x i64>
155 %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 32, i8 32)
156 %4 = bitcast <2 x i64> %3 to <16 x i8>
160 define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) {
161 ; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu
162 ; CHECK-NEXT: %1 = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
163 ; CHECK-NEXT: ret <16 x i8> %1
164 %1 = bitcast <16 x i8> %v to <2 x i64>
165 %2 = bitcast <16 x i8> %i to <2 x i64>
166 %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 16, i8 0)
167 %4 = bitcast <2 x i64> %3 to <16 x i8>
171 define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) {
172 ; CHECK-LABEL: @test_insertqi_constant
173 ; CHECK-NEXT: ret <2 x i64> <i64 -131055, i64 undef>
174 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 8, i64 0>, i8 16, i8 1)
178 ; The result of this insert is the second arg, since the top 64 bits of
179 ; the result are undefined, and we copy the bottom 64 bits from the
181 define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
182 ; CHECK-LABEL: @testInsert64Bits
183 ; CHECK-NEXT: ret <2 x i64> %i
184 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
188 define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
189 ; CHECK-LABEL: @testZeroLength
190 ; CHECK-NEXT: ret <2 x i64> %i
191 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
195 define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
196 ; CHECK-LABEL: @testUndefinedInsertq_1
197 ; CHECK-NEXT: ret <2 x i64> undef
198 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
202 define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
203 ; CHECK-LABEL: @testUndefinedInsertq_2
204 ; CHECK-NEXT: ret <2 x i64> undef
205 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
209 define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
210 ; CHECK-LABEL: @testUndefinedInsertq_3
211 ; CHECK-NEXT: ret <2 x i64> undef
212 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
217 ; Vector Demanded Bits
220 define <2 x i64> @test_extrq_arg0(<2 x i64> %x, <16 x i8> %y) {
221 ; CHECK-LABEL: @test_extrq_arg0
222 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
223 ; CHECK-NEXT: ret <2 x i64> %1
224 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
225 %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %y) nounwind
229 define <2 x i64> @test_extrq_arg1(<2 x i64> %x, <16 x i8> %y) {
230 ; CHECK-LABEL: @test_extrq_arg1
231 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
232 ; CHECK-NEXT: ret <2 x i64> %1
233 %1 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
234 %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind
238 define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) {
239 ; CHECK-LABEL: @test_extrq_args01
240 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
241 ; CHECK-NEXT: ret <2 x i64> %1
242 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
243 %2 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
244 %3 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %2) nounwind
248 define <2 x i64> @test_extrq_ret(<2 x i64> %x, <16 x i8> %y) {
249 ; CHECK-LABEL: @test_extrq_ret
250 ; CHECK-NEXT: ret <2 x i64> undef
251 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
252 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
256 define <2 x i64> @test_extrqi_arg0(<2 x i64> %x) {
257 ; CHECK-LABEL: @test_extrqi_arg0
258 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
259 ; CHECK-NEXT: ret <2 x i64> %1
260 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
261 %2 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %1, i8 3, i8 2)
265 define <2 x i64> @test_extrqi_ret(<2 x i64> %x) {
266 ; CHECK-LABEL: @test_extrqi_ret
267 ; CHECK-NEXT: ret <2 x i64> undef
268 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) nounwind
269 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
273 define <2 x i64> @test_insertq_arg0(<2 x i64> %x, <2 x i64> %y) {
274 ; CHECK-LABEL: @test_insertq_arg0
275 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
276 ; CHECK-NEXT: ret <2 x i64> %1
277 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
278 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %1, <2 x i64> %y) nounwind
282 define <2 x i64> @test_insertq_ret(<2 x i64> %x, <2 x i64> %y) {
283 ; CHECK-LABEL: @test_insertq_ret
284 ; CHECK-NEXT: ret <2 x i64> undef
285 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
286 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
290 define <2 x i64> @test_insertqi_arg0(<2 x i64> %x, <2 x i64> %y) {
291 ; CHECK-LABEL: @test_insertqi_arg0
292 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
293 ; CHECK-NEXT: ret <2 x i64> %1
294 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
295 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %y, i8 3, i8 2) nounwind
299 define <2 x i64> @test_insertqi_arg1(<2 x i64> %x, <2 x i64> %y) {
300 ; CHECK-LABEL: @test_insertqi_arg1
301 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
302 ; CHECK-NEXT: ret <2 x i64> %1
303 %1 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
304 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %1, i8 3, i8 2) nounwind
308 define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) {
309 ; CHECK-LABEL: @test_insertqi_args01
310 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
311 ; CHECK-NEXT: ret <2 x i64> %1
312 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
313 %2 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
314 %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 3, i8 2) nounwind
318 define <2 x i64> @test_insertqi_ret(<2 x i64> %x, <2 x i64> %y) {
319 ; CHECK-LABEL: @test_insertqi_ret
320 ; CHECK-NEXT: ret <2 x i64> undef
321 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) nounwind
322 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
326 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrq
327 declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind
329 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrqi
330 declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
332 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertq
333 declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind
335 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
336 declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind