1 ; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
2 ; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+avx2 < %s | FileCheck %s --check-prefix=AVX
6 ; Only equal/not-equal/ordered/unordered can be safely commuted
9 define <4 x i32> @commute_cmpps_eq(<4 x float>* %a0, <4 x float> %a1) #0 {
10 ;SSE-LABEL: commute_cmpps_eq
11 ;SSE: cmpeqps (%rdi), %xmm0
14 ;AVX-LABEL: commute_cmpps_eq
15 ;AVX: vcmpeqps (%rdi), %xmm0, %xmm0
18 %1 = load <4 x float>, <4 x float>* %a0
19 %2 = fcmp oeq <4 x float> %1, %a1
20 %3 = sext <4 x i1> %2 to <4 x i32>
24 define <4 x i32> @commute_cmpps_ne(<4 x float>* %a0, <4 x float> %a1) #0 {
25 ;SSE-LABEL: commute_cmpps_ne
26 ;SSE: cmpneqps (%rdi), %xmm0
29 ;AVX-LABEL: commute_cmpps_ne
30 ;AVX: vcmpneqps (%rdi), %xmm0, %xmm0
33 %1 = load <4 x float>, <4 x float>* %a0
34 %2 = fcmp une <4 x float> %1, %a1
35 %3 = sext <4 x i1> %2 to <4 x i32>
39 define <4 x i32> @commute_cmpps_ord(<4 x float>* %a0, <4 x float> %a1) #0 {
40 ;SSE-LABEL: commute_cmpps_ord
41 ;SSE: cmpordps (%rdi), %xmm0
44 ;AVX-LABEL: commute_cmpps_ord
45 ;AVX: vcmpordps (%rdi), %xmm0, %xmm0
48 %1 = load <4 x float>, <4 x float>* %a0
49 %2 = fcmp ord <4 x float> %1, %a1
50 %3 = sext <4 x i1> %2 to <4 x i32>
54 define <4 x i32> @commute_cmpps_uno(<4 x float>* %a0, <4 x float> %a1) #0 {
55 ;SSE-LABEL: commute_cmpps_uno
56 ;SSE: cmpunordps (%rdi), %xmm0
59 ;AVX-LABEL: commute_cmpps_uno
60 ;AVX: vcmpunordps (%rdi), %xmm0, %xmm0
63 %1 = load <4 x float>, <4 x float>* %a0
64 %2 = fcmp uno <4 x float> %1, %a1
65 %3 = sext <4 x i1> %2 to <4 x i32>
69 define <4 x i32> @commute_cmpps_lt(<4 x float>* %a0, <4 x float> %a1) #0 {
70 ;SSE-LABEL: commute_cmpps_lt
71 ;SSE: movaps (%rdi), %xmm1
72 ;SSE-NEXT: cmpltps %xmm0, %xmm1
73 ;SSE-NEXT: movaps %xmm1, %xmm0
76 ;AVX-LABEL: commute_cmpps_lt
77 ;AVX: vmovaps (%rdi), %xmm1
78 ;AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
81 %1 = load <4 x float>, <4 x float>* %a0
82 %2 = fcmp olt <4 x float> %1, %a1
83 %3 = sext <4 x i1> %2 to <4 x i32>
87 define <4 x i32> @commute_cmpps_le(<4 x float>* %a0, <4 x float> %a1) #0 {
88 ;SSE-LABEL: commute_cmpps_le
89 ;SSE: movaps (%rdi), %xmm1
90 ;SSE-NEXT: cmpleps %xmm0, %xmm1
91 ;SSE-NEXT: movaps %xmm1, %xmm0
94 ;AVX-LABEL: commute_cmpps_le
95 ;AVX: vmovaps (%rdi), %xmm1
96 ;AVX-NEXT: vcmpleps %xmm0, %xmm1, %xmm0
99 %1 = load <4 x float>, <4 x float>* %a0
100 %2 = fcmp ole <4 x float> %1, %a1
101 %3 = sext <4 x i1> %2 to <4 x i32>
105 define <8 x i32> @commute_cmpps_eq_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
106 ;AVX-LABEL: commute_cmpps_eq_ymm
107 ;AVX: vcmpeqps (%rdi), %ymm0, %ymm0
110 %1 = load <8 x float>, <8 x float>* %a0
111 %2 = fcmp oeq <8 x float> %1, %a1
112 %3 = sext <8 x i1> %2 to <8 x i32>
116 define <8 x i32> @commute_cmpps_ne_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
117 ;AVX-LABEL: commute_cmpps_ne_ymm
118 ;AVX: vcmpneqps (%rdi), %ymm0, %ymm0
121 %1 = load <8 x float>, <8 x float>* %a0
122 %2 = fcmp une <8 x float> %1, %a1
123 %3 = sext <8 x i1> %2 to <8 x i32>
127 define <8 x i32> @commute_cmpps_ord_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
128 ;AVX-LABEL: commute_cmpps_ord_ymm
129 ;AVX: vcmpordps (%rdi), %ymm0, %ymm0
132 %1 = load <8 x float>, <8 x float>* %a0
133 %2 = fcmp ord <8 x float> %1, %a1
134 %3 = sext <8 x i1> %2 to <8 x i32>
138 define <8 x i32> @commute_cmpps_uno_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
139 ;AVX-LABEL: commute_cmpps_uno_ymm
140 ;AVX: vcmpunordps (%rdi), %ymm0, %ymm0
143 %1 = load <8 x float>, <8 x float>* %a0
144 %2 = fcmp uno <8 x float> %1, %a1
145 %3 = sext <8 x i1> %2 to <8 x i32>
149 define <8 x i32> @commute_cmpps_lt_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
150 ;AVX-LABEL: commute_cmpps_lt_ymm
151 ;AVX: vmovaps (%rdi), %ymm1
152 ;AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
155 %1 = load <8 x float>, <8 x float>* %a0
156 %2 = fcmp olt <8 x float> %1, %a1
157 %3 = sext <8 x i1> %2 to <8 x i32>
161 define <8 x i32> @commute_cmpps_le_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
162 ;AVX-LABEL: commute_cmpps_le_ymm
163 ;AVX: vmovaps (%rdi), %ymm1
164 ;AVX-NEXT: vcmpleps %ymm0, %ymm1, %ymm0
167 %1 = load <8 x float>, <8 x float>* %a0
168 %2 = fcmp ole <8 x float> %1, %a1
169 %3 = sext <8 x i1> %2 to <8 x i32>
175 ; Only equal/not-equal/ordered/unordered can be safely commuted
178 define <2 x i64> @commute_cmppd_eq(<2 x double>* %a0, <2 x double> %a1) #0 {
179 ;SSE-LABEL: commute_cmppd_eq
180 ;SSE: cmpeqpd (%rdi), %xmm0
183 ;AVX-LABEL: commute_cmppd_eq
184 ;AVX: vcmpeqpd (%rdi), %xmm0, %xmm0
187 %1 = load <2 x double>, <2 x double>* %a0
188 %2 = fcmp oeq <2 x double> %1, %a1
189 %3 = sext <2 x i1> %2 to <2 x i64>
193 define <2 x i64> @commute_cmppd_ne(<2 x double>* %a0, <2 x double> %a1) #0 {
194 ;SSE-LABEL: commute_cmppd_ne
195 ;SSE: cmpneqpd (%rdi), %xmm0
198 ;AVX-LABEL: commute_cmppd_ne
199 ;AVX: vcmpneqpd (%rdi), %xmm0, %xmm0
202 %1 = load <2 x double>, <2 x double>* %a0
203 %2 = fcmp une <2 x double> %1, %a1
204 %3 = sext <2 x i1> %2 to <2 x i64>
208 define <2 x i64> @commute_cmppd_ord(<2 x double>* %a0, <2 x double> %a1) #0 {
209 ;SSE-LABEL: commute_cmppd_ord
210 ;SSE: cmpordpd (%rdi), %xmm0
213 ;AVX-LABEL: commute_cmppd_ord
214 ;AVX: vcmpordpd (%rdi), %xmm0, %xmm0
217 %1 = load <2 x double>, <2 x double>* %a0
218 %2 = fcmp ord <2 x double> %1, %a1
219 %3 = sext <2 x i1> %2 to <2 x i64>
223 define <2 x i64> @commute_cmppd_uno(<2 x double>* %a0, <2 x double> %a1) #0 {
224 ;SSE-LABEL: commute_cmppd_uno
225 ;SSE: cmpunordpd (%rdi), %xmm0
228 ;AVX-LABEL: commute_cmppd_uno
229 ;AVX: vcmpunordpd (%rdi), %xmm0, %xmm0
232 %1 = load <2 x double>, <2 x double>* %a0
233 %2 = fcmp uno <2 x double> %1, %a1
234 %3 = sext <2 x i1> %2 to <2 x i64>
238 define <2 x i64> @commute_cmppd_lt(<2 x double>* %a0, <2 x double> %a1) #0 {
239 ;SSE-LABEL: commute_cmppd_lt
240 ;SSE: movapd (%rdi), %xmm1
241 ;SSE-NEXT: cmpltpd %xmm0, %xmm1
242 ;SSE-NEXT: movapd %xmm1, %xmm0
245 ;AVX-LABEL: commute_cmppd_lt
246 ;AVX: vmovapd (%rdi), %xmm1
247 ;AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
250 %1 = load <2 x double>, <2 x double>* %a0
251 %2 = fcmp olt <2 x double> %1, %a1
252 %3 = sext <2 x i1> %2 to <2 x i64>
256 define <2 x i64> @commute_cmppd_le(<2 x double>* %a0, <2 x double> %a1) #0 {
257 ;SSE-LABEL: commute_cmppd_le
258 ;SSE: movapd (%rdi), %xmm1
259 ;SSE-NEXT: cmplepd %xmm0, %xmm1
260 ;SSE-NEXT: movapd %xmm1, %xmm0
263 ;AVX-LABEL: commute_cmppd_le
264 ;AVX: vmovapd (%rdi), %xmm1
265 ;AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
268 %1 = load <2 x double>, <2 x double>* %a0
269 %2 = fcmp ole <2 x double> %1, %a1
270 %3 = sext <2 x i1> %2 to <2 x i64>
274 define <4 x i64> @commute_cmppd_eq_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
275 ;AVX-LABEL: commute_cmppd_eq
276 ;AVX: vcmpeqpd (%rdi), %ymm0, %ymm0
279 %1 = load <4 x double>, <4 x double>* %a0
280 %2 = fcmp oeq <4 x double> %1, %a1
281 %3 = sext <4 x i1> %2 to <4 x i64>
285 define <4 x i64> @commute_cmppd_ne_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
286 ;AVX-LABEL: commute_cmppd_ne
287 ;AVX: vcmpneqpd (%rdi), %ymm0, %ymm0
290 %1 = load <4 x double>, <4 x double>* %a0
291 %2 = fcmp une <4 x double> %1, %a1
292 %3 = sext <4 x i1> %2 to <4 x i64>
296 define <4 x i64> @commute_cmppd_ord_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
297 ;AVX-LABEL: commute_cmppd_ord
298 ;AVX: vcmpordpd (%rdi), %ymm0, %ymm0
301 %1 = load <4 x double>, <4 x double>* %a0
302 %2 = fcmp ord <4 x double> %1, %a1
303 %3 = sext <4 x i1> %2 to <4 x i64>
307 define <4 x i64> @commute_cmppd_uno_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
308 ;AVX-LABEL: commute_cmppd_uno
309 ;AVX: vcmpunordpd (%rdi), %ymm0, %ymm0
312 %1 = load <4 x double>, <4 x double>* %a0
313 %2 = fcmp uno <4 x double> %1, %a1
314 %3 = sext <4 x i1> %2 to <4 x i64>
318 define <4 x i64> @commute_cmppd_lt_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
319 ;AVX-LABEL: commute_cmppd_lt
320 ;AVX: vmovapd (%rdi), %ymm1
321 ;AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
324 %1 = load <4 x double>, <4 x double>* %a0
325 %2 = fcmp olt <4 x double> %1, %a1
326 %3 = sext <4 x i1> %2 to <4 x i64>
330 define <4 x i64> @commute_cmppd_le_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
331 ;AVX-LABEL: commute_cmppd_le
332 ;AVX: vmovapd (%rdi), %ymm1
333 ;AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
336 %1 = load <4 x double>, <4 x double>* %a0
337 %2 = fcmp ole <4 x double> %1, %a1
338 %3 = sext <4 x i1> %2 to <4 x i64>