1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
4 define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
5 %x = add <4 x i64> %i, %j
10 define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
11 %x = add <8 x i32> %i, %j
16 define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
17 %x = add <16 x i16> %i, %j
22 define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
23 %x = add <32 x i8> %i, %j
28 define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
29 %x = sub <4 x i64> %i, %j
34 define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
35 %x = sub <8 x i32> %i, %j
40 define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
41 %x = sub <16 x i16> %i, %j
46 define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
47 %x = sub <32 x i8> %i, %j
52 define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
53 %x = mul <8 x i32> %i, %j
58 define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
59 %x = mul <16 x i16> %i, %j
65 ; CHECK-NEXT: vpmovsxbw %xmm1, %ymm1
66 ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0
67 ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0
68 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
69 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
70 ; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1
71 ; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0
72 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
73 ; CHECK-NEXT: vzeroupper
75 define <16 x i8> @mul-v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
76 %x = mul <16 x i8> %i, %j
82 ; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2
83 ; CHECK-NEXT: vpmovsxbw %xmm2, %ymm2
84 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm3
85 ; CHECK-NEXT: vpmovsxbw %xmm3, %ymm3
86 ; CHECK-NEXT: vpmullw %ymm2, %ymm3, %ymm2
87 ; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3
88 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
89 ; CHECK-NEXT: vpshufb %xmm4, %xmm3, %xmm3
90 ; CHECK-NEXT: vpshufb %xmm4, %xmm2, %xmm2
91 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
92 ; CHECK-NEXT: vpmovsxbw %xmm1, %ymm1
93 ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0
94 ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0
95 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
96 ; CHECK-NEXT: vpshufb %xmm4, %xmm1, %xmm1
97 ; CHECK-NEXT: vpshufb %xmm4, %xmm0, %xmm0
98 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
99 ; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
101 define <32 x i8> @mul-v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
102 %x = mul <32 x i8> %i, %j
107 ; CHECK: vpmuludq %ymm
108 ; CHECK-NEXT: vpsrlq $32, %ymm
109 ; CHECK-NEXT: vpmuludq %ymm
110 ; CHECK-NEXT: vpsllq $32, %ymm
111 ; CHECK-NEXT: vpaddq %ymm
112 ; CHECK-NEXT: vpsrlq $32, %ymm
113 ; CHECK-NEXT: vpmuludq %ymm
114 ; CHECK-NEXT: vpsllq $32, %ymm
115 ; CHECK-NEXT: vpaddq %ymm
116 define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
117 %x = mul <4 x i64> %i, %j
124 define <8 x i32> @mul_const1(<8 x i32> %x) {
125 %y = mul <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
132 define <4 x i64> @mul_const2(<4 x i64> %x) {
133 %y = mul <4 x i64> %x, <i64 4, i64 4, i64 4, i64 4>
140 define <16 x i16> @mul_const3(<16 x i16> %x) {
141 %y = mul <16 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
149 define <4 x i64> @mul_const4(<4 x i64> %x) {
150 %y = mul <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1>
157 define <8 x i32> @mul_const5(<8 x i32> %x) {
158 %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
165 define <8 x i32> @mul_const6(<8 x i32> %x) {
166 %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 2, i32 0, i32 2, i32 0, i32 0>
174 define <8 x i64> @mul_const7(<8 x i64> %x) {
175 %y = mul <8 x i64> %x, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
182 define <8 x i16> @mul_const8(<8 x i16> %x) {
183 %y = mul <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
190 define <8 x i32> @mul_const9(<8 x i32> %x) {
191 %y = mul <8 x i32> %x, <i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
198 define <4 x i32> @mul_const10(<4 x i32> %x) {
200 %m = mul <4 x i32> %x, <i32 16843009, i32 16843009, i32 16843009, i32 16843009>
207 define <4 x i32> @mul_const11(<4 x i32> %x) {
209 %m = mul <4 x i32> %x, <i32 2155905152, i32 2155905152, i32 2155905152, i32 2155905152>