test/CodeGen/X86/avx-vperm2x128.ll

   1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
   2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
   3
   4 define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
   5 ; ALL-LABEL: A:
   6 ; ALL:       ## BB#0: ## %entry
   7 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
   8 ; ALL-NEXT:    retq
   9 entry:
  10   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
  11   ret <8 x float> %shuffle
  12 }
  13
  14 define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
  15 ; ALL-LABEL: B:
  16 ; ALL:       ## BB#0: ## %entry
  17 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
  18 ; ALL-NEXT:    retq
  19 entry:
  20   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
  21   ret <8 x float> %shuffle
  22 }
  23
  24 define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
  25 ; ALL-LABEL: C:
  26 ; ALL:       ## BB#0: ## %entry
  27 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
  28 ; ALL-NEXT:    retq
  29 entry:
  30   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  31   ret <8 x float> %shuffle
  32 }
  33
  34 define <8 x float> @D(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
  35 ; ALL-LABEL: D:
  36 ; ALL:       ## BB#0: ## %entry
  37 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
  38 ; ALL-NEXT:    retq
  39 entry:
  40   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
  41   ret <8 x float> %shuffle
  42 }
  43
  44 define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
  45 ; ALL-LABEL: E:
  46 ; ALL:       ## BB#0: ## %entry
  47 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
  48 ; ALL-NEXT:    retq
  49 entry:
  50   %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
  51   ret <32 x i8> %shuffle
  52 }
  53
  54 define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  55 ; ALL-LABEL: E2:
  56 ; ALL:       ## BB#0: ## %entry
  57 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
  58 ; ALL-NEXT:    retq
  59 entry:
  60   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
  61   ret <4 x i64> %shuffle
  62 }
  63
  64 define <32 x i8> @Ei(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
  65 ; AVX1-LABEL: Ei:
  66 ; AVX1:       ## BB#0: ## %entry
  67 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
  68 ; AVX1-NEXT:    vpaddb {{.*}}(%rip), %xmm0, %xmm0
  69 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
  70 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
  71 ; AVX1-NEXT:    retq
  72 ;
  73 ; AVX2-LABEL: Ei:
  74 ; AVX2:       ## BB#0: ## %entry
  75 ; AVX2-NEXT:    vpaddb {{.*}}(%rip), %ymm0, %ymm0
  76 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
  77 ; AVX2-NEXT:    retq
  78 entry:
  79   ; add forces execution domain
  80   %a2 = add <32 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
  81   %shuffle = shufflevector <32 x i8> %a2, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
  82   ret <32 x i8> %shuffle
  83 }
  84
  85 define <4 x i64> @E2i(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  86 ; AVX1-LABEL: E2i:
  87 ; AVX1:       ## BB#0: ## %entry
  88 ; AVX1-NEXT:    vpaddq {{.*}}(%rip), %xmm0, %xmm0
  89 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
  90 ; AVX1-NEXT:    retq
  91 ;
  92 ; AVX2-LABEL: E2i:
  93 ; AVX2:       ## BB#0: ## %entry
  94 ; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
  95 ; AVX2-NEXT:    vpaddq %ymm2, %ymm0, %ymm0
  96 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
  97 ; AVX2-NEXT:    retq
  98 entry:
  99   ; add forces execution domain
 100   %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
 101   %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
 102   ret <4 x i64> %shuffle
 103 }
 104
 105 define <8 x i32> @E3i(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
 106 ; AVX1-LABEL: E3i:
 107 ; AVX1:       ## BB#0: ## %entry
 108 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 109 ; AVX1-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
 110 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 111 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
 112 ; AVX1-NEXT:    retq
 113 ;
 114 ; AVX2-LABEL: E3i:
 115 ; AVX2:       ## BB#0: ## %entry
 116 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
 117 ; AVX2-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
 118 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
 119 ; AVX2-NEXT:    retq
 120 entry:
 121   ; add forces execution domain
 122   %a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 123   %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b, <8 x i32> <i32 undef, i32 5, i32 undef, i32 7, i32 12, i32 13, i32 14, i32 15>
 124   ret <8 x i32> %shuffle
 125 }
 126
 127 define <16 x i16> @E4i(<16 x i16> %a, <16 x i16> %b) nounwind uwtable readnone ssp {
 128 ; AVX1-LABEL: E4i:
 129 ; AVX1:       ## BB#0: ## %entry
 130 ; AVX1-NEXT:    vpaddw {{.*}}(%rip), %xmm0, %xmm0
 131 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 132 ; AVX1-NEXT:    retq
 133 ;
 134 ; AVX2-LABEL: E4i:
 135 ; AVX2:       ## BB#0: ## %entry
 136 ; AVX2-NEXT:    vpaddw {{.*}}(%rip), %ymm0, %ymm0
 137 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
 138 ; AVX2-NEXT:    retq
 139 entry:
 140   ; add forces execution domain
 141   %a2 = add <16 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
 142   %shuffle = shufflevector <16 x i16> %a2, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 143   ret <16 x i16> %shuffle
 144 }
 145
 146 define <16 x i16> @E5i(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp {
 147 ; AVX1-LABEL: E5i:
 148 ; AVX1:       ## BB#0: ## %entry
 149 ; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
 150 ; AVX1-NEXT:    vmovaps (%rsi), %ymm1
 151 ; AVX1-NEXT:    vpaddw {{.*}}(%rip), %xmm0, %xmm0
 152 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 153 ; AVX1-NEXT:    retq
 154 ;
 155 ; AVX2-LABEL: E5i:
 156 ; AVX2:       ## BB#0: ## %entry
 157 ; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
 158 ; AVX2-NEXT:    vmovdqa (%rsi), %ymm1
 159 ; AVX2-NEXT:    vpaddw {{.*}}(%rip), %ymm0, %ymm0
 160 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
 161 ; AVX2-NEXT:    retq
 162 entry:
 163   %c = load <16 x i16>, <16 x i16>* %a
 164   %d = load <16 x i16>, <16 x i16>* %b
 165   %c2 = add <16 x i16> %c, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
 166   %shuffle = shufflevector <16 x i16> %c2, <16 x i16> %d, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 167   ret <16 x i16> %shuffle
 168 }
 169
 170 ;;;; Cases with undef indicies mixed in the mask
 171
 172 define <8 x float> @F(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 173 ; ALL-LABEL: F:
 174 ; ALL:       ## BB#0: ## %entry
 175 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
 176 ; ALL-NEXT:    retq
 177 entry:
 178   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 9, i32 undef, i32 11>
 179   ret <8 x float> %shuffle
 180 }
 181
 182 define <8 x float> @F2(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 183 ; ALL-LABEL: F2:
 184 ; ALL:       ## BB#0: ## %entry
 185 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
 186 ; ALL-NEXT:    retq
 187 entry:
 188   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7>
 189   ret <8 x float> %shuffle
 190 }
 191
 192 define <8 x float> @F3(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 193 ; ALL-LABEL: F3:
 194 ; ALL:       ## BB#0: ## %entry
 195 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
 196 ; ALL-NEXT:    retq
 197 entry:
 198   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 10, i32 11>
 199   ret <8 x float> %shuffle
 200 }
 201
 202 define <8 x float> @F4(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 203 ; ALL-LABEL: F4:
 204 ; ALL:       ## BB#0: ## %entry
 205 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
 206 ; ALL-NEXT:    retq
 207 entry:
 208   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 14, i32 15>
 209   ret <8 x float> %shuffle
 210 }
 211
 212 define <8 x float> @F5(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 213 ; ALL-LABEL: F5:
 214 ; ALL:       ## BB#0: ## %entry
 215 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
 216 ; ALL-NEXT:    retq
 217 entry:
 218   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
 219   ret <8 x float> %shuffle
 220 }
 221
 222 define <8 x float> @F6(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 223 ; ALL-LABEL: F6:
 224 ; ALL:       ## BB#0: ## %entry
 225 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
 226 ; ALL-NEXT:    retq
 227 entry:
 228   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 229   ret <8 x float> %shuffle
 230 }
 231
 232 define <8 x float> @F7(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 233 ; ALL-LABEL: F7:
 234 ; ALL:       ## BB#0: ## %entry
 235 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
 236 ; ALL-NEXT:    retq
 237 entry:
 238   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7>
 239   ret <8 x float> %shuffle
 240 }
 241
 242 define <8 x float> @F8(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 243 ; ALL-LABEL: F8:
 244 ; ALL:       ## BB#0: ## %entry
 245 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
 246 ; ALL-NEXT:    retq
 247 entry:
 248   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 14, i32 15>
 249   ret <8 x float> %shuffle
 250 }
 251
 252 ;;;; Cases we must not select vperm2f128
 253
 254 define <8 x float> @G(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 255 ; ALL-LABEL: G:
 256 ; ALL:       ## BB#0: ## %entry
 257 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
 258 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
 259 ; ALL-NEXT:    retq
 260 entry:
 261   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 12, i32 undef, i32 15>
 262   ret <8 x float> %shuffle
 263 }
 264
 265 ;; Test zero mask generation.
 266 ;; PR22984: https://llvm.org/bugs/show_bug.cgi?id=22984
 267 ;; Prefer xor+vblendpd over vperm2f128 because that has better performance.
 268
 269 define <4 x double> @vperm2z_0x08(<4 x double> %a) {
 270 ; ALL-LABEL: vperm2z_0x08:
 271 ; ALL:       # BB#0:
 272 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
 273 ; ALL-NEXT:    retq
 274   %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
 275   ret <4 x double> %s
 276 }
 277
 278 define <4 x double> @vperm2z_0x18(<4 x double> %a) {
 279 ; ALL-LABEL: vperm2z_0x18:
 280 ; ALL:       # BB#0:
 281 ; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
 282 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
 283 ; ALL-NEXT:    retq
 284   %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 285   ret <4 x double> %s
 286 }
 287
 288 define <4 x double> @vperm2z_0x28(<4 x double> %a) {
 289 ; ALL-LABEL: vperm2z_0x28:
 290 ; ALL:       # BB#0:
 291 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
 292 ; ALL-NEXT:    retq
 293   %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 294   ret <4 x double> %s
 295 }
 296
 297 define <4 x double> @vperm2z_0x38(<4 x double> %a) {
 298 ; ALL-LABEL: vperm2z_0x38:
 299 ; ALL:       # BB#0:
 300 ; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
 301 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
 302 ; ALL-NEXT:    retq
 303   %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
 304   ret <4 x double> %s
 305 }
 306
 307 define <4 x double> @vperm2z_0x80(<4 x double> %a) {
 308 ; ALL-LABEL: vperm2z_0x80:
 309 ; ALL:       # BB#0:
 310 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],zero,zero
 311 ; ALL-NEXT:    retq
 312   %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 313   ret <4 x double> %s
 314 }
 315
 316 define <4 x double> @vperm2z_0x81(<4 x double> %a) {
 317 ; ALL-LABEL: vperm2z_0x81:
 318 ; ALL:       # BB#0:
 319 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
 320 ; ALL-NEXT:    retq
 321   %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
 322   ret <4 x double> %s
 323 }
 324
 325 define <4 x double> @vperm2z_0x82(<4 x double> %a) {
 326 ; ALL-LABEL: vperm2z_0x82:
 327 ; ALL:       # BB#0:
 328 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],zero,zero
 329 ; ALL-NEXT:    retq
 330   %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
 331   ret <4 x double> %s
 332 }
 333
 334 define <4 x double> @vperm2z_0x83(<4 x double> %a) {
 335 ; ALL-LABEL: vperm2z_0x83:
 336 ; ALL:       # BB#0:
 337 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
 338 ; ALL-NEXT:    retq
 339   %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
 340   ret <4 x double> %s
 341 }
 342
 343 ;; With AVX2 select the integer version of the instruction. Use an add to force the domain selection.
 344
 345 define <4 x i64> @vperm2z_int_0x83(<4 x i64> %a, <4 x i64> %b) {
 346 ; ALL-LABEL: vperm2z_int_0x83:
 347 ; ALL:       # BB#0:
 348 ; AVX1:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
 349 ; AVX2:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
 350   %s = shufflevector <4 x i64> <i64 0, i64 0, i64 undef, i64 undef>, <4 x i64> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
 351   %c = add <4 x i64> %b, %s
 352   ret <4 x i64> %c
 353 }
 354