1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX
6 define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
8 ; ALL: ## BB#0: ## %entry
9 ; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
10 ; ALL-NEXT: vpandd %zmm1, %zmm0, %zmm0
13 ; Force the execution domain with an add.
14 %a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
15 i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
16 %x = and <16 x i32> %a2, %b
20 define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
22 ; ALL: ## BB#0: ## %entry
23 ; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
24 ; ALL-NEXT: vpord %zmm1, %zmm0, %zmm0
27 ; Force the execution domain with an add.
28 %a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
29 i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
30 %x = or <16 x i32> %a2, %b
34 define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
36 ; ALL: ## BB#0: ## %entry
37 ; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
38 ; ALL-NEXT: vpxord %zmm1, %zmm0, %zmm0
41 ; Force the execution domain with an add.
42 %a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
43 i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
44 %x = xor <16 x i32> %a2, %b
48 define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
50 ; ALL: ## BB#0: ## %entry
51 ; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
52 ; ALL-NEXT: vpandq %zmm1, %zmm0, %zmm0
55 ; Force the execution domain with an add.
56 %a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
57 %x = and <8 x i64> %a2, %b
61 define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
63 ; ALL: ## BB#0: ## %entry
64 ; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
65 ; ALL-NEXT: vporq %zmm1, %zmm0, %zmm0
68 ; Force the execution domain with an add.
69 %a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
70 %x = or <8 x i64> %a2, %b
74 define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
76 ; ALL: ## BB#0: ## %entry
77 ; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
78 ; ALL-NEXT: vpxorq %zmm1, %zmm0, %zmm0
81 ; Force the execution domain with an add.
82 %a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
83 %x = xor <8 x i64> %a2, %b
88 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
89 ; ALL-LABEL: orq_broadcast:
91 ; ALL-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
93 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
97 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
98 ; ALL-LABEL: andd512fold:
99 ; ALL: ## BB#0: ## %entry
100 ; ALL-NEXT: vpandd (%rdi), %zmm0, %zmm0
103 %a = load <16 x i32>, <16 x i32>* %x, align 4
104 %b = and <16 x i32> %y, %a
108 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
109 ; ALL-LABEL: andqbrst:
110 ; ALL: ## BB#0: ## %entry
111 ; ALL-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
114 %a = load i64, i64* %ap, align 8
115 %b = insertelement <8 x i64> undef, i64 %a, i32 0
116 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
117 %d = and <8 x i64> %p1, %c
121 define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) {
122 ; KNL-LABEL: and_v64i8:
124 ; KNL-NEXT: vandps %ymm2, %ymm0, %ymm0
125 ; KNL-NEXT: vandps %ymm3, %ymm1, %ymm1
128 ; SKX-LABEL: and_v64i8:
130 ; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0
132 %res = and <64 x i8> %a, %b
136 define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) {
137 ; KNL-LABEL: or_v64i8:
139 ; KNL-NEXT: vorps %ymm2, %ymm0, %ymm0
140 ; KNL-NEXT: vorps %ymm3, %ymm1, %ymm1
143 ; SKX-LABEL: or_v64i8:
145 ; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0
147 %res = or <64 x i8> %a, %b
151 define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) {
152 ; KNL-LABEL: xor_v64i8:
154 ; KNL-NEXT: vxorps %ymm2, %ymm0, %ymm0
155 ; KNL-NEXT: vxorps %ymm3, %ymm1, %ymm1
158 ; SKX-LABEL: xor_v64i8:
160 ; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0
162 %res = xor <64 x i8> %a, %b
166 define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) {
167 ; KNL-LABEL: and_v32i16:
169 ; KNL-NEXT: vandps %ymm2, %ymm0, %ymm0
170 ; KNL-NEXT: vandps %ymm3, %ymm1, %ymm1
173 ; SKX-LABEL: and_v32i16:
175 ; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0
177 %res = and <32 x i16> %a, %b
181 define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) {
182 ; KNL-LABEL: or_v32i16:
184 ; KNL-NEXT: vorps %ymm2, %ymm0, %ymm0
185 ; KNL-NEXT: vorps %ymm3, %ymm1, %ymm1
188 ; SKX-LABEL: or_v32i16:
190 ; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0
192 %res = or <32 x i16> %a, %b
196 define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) {
197 ; KNL-LABEL: xor_v32i16:
199 ; KNL-NEXT: vxorps %ymm2, %ymm0, %ymm0
200 ; KNL-NEXT: vxorps %ymm3, %ymm1, %ymm1
203 ; SKX-LABEL: xor_v32i16:
205 ; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0
207 %res = xor <32 x i16> %a, %b