test/CodeGen/X86/avx-vperm2x128.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
   3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
   4
   5 define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
   6 ; ALL-LABEL: A:
   7 ; ALL:       ## BB#0: ## %entry
   8 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
   9 ; ALL-NEXT:    retq
  10 entry:
  11   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
  12   ret <8 x float> %shuffle
  13 }
  14
  15 define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
  16 ; ALL-LABEL: B:
  17 ; ALL:       ## BB#0: ## %entry
  18 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
  19 ; ALL-NEXT:    retq
  20 entry:
  21   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
  22   ret <8 x float> %shuffle
  23 }
  24
  25 define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
  26 ; ALL-LABEL: C:
  27 ; ALL:       ## BB#0: ## %entry
  28 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
  29 ; ALL-NEXT:    retq
  30 entry:
  31   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  32   ret <8 x float> %shuffle
  33 }
  34
  35 define <8 x float> @D(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
  36 ; ALL-LABEL: D:
  37 ; ALL:       ## BB#0: ## %entry
  38 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
  39 ; ALL-NEXT:    retq
  40 entry:
  41   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
  42   ret <8 x float> %shuffle
  43 }
  44
  45 define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
  46 ; ALL-LABEL: E:
  47 ; ALL:       ## BB#0: ## %entry
  48 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
  49 ; ALL-NEXT:    retq
  50 entry:
  51   %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
  52   ret <32 x i8> %shuffle
  53 }
  54
  55 define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  56 ; ALL-LABEL: E2:
  57 ; ALL:       ## BB#0: ## %entry
  58 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
  59 ; ALL-NEXT:    retq
  60 entry:
  61   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
  62   ret <4 x i64> %shuffle
  63 }
  64
  65 define <32 x i8> @Ei(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
  66 ; AVX1-LABEL: Ei:
  67 ; AVX1:       ## BB#0: ## %entry
  68 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
  69 ; AVX1-NEXT:    vpaddb {{.*}}(%rip), %xmm0, %xmm0
  70 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
  71 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
  72 ; AVX1-NEXT:    retq
  73 ;
  74 ; AVX2-LABEL: Ei:
  75 ; AVX2:       ## BB#0: ## %entry
  76 ; AVX2-NEXT:    vpaddb {{.*}}(%rip), %ymm0, %ymm0
  77 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
  78 ; AVX2-NEXT:    retq
  79 entry:
  80   ; add forces execution domain
  81   %a2 = add <32 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
  82   %shuffle = shufflevector <32 x i8> %a2, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
  83   ret <32 x i8> %shuffle
  84 }
  85
  86 define <4 x i64> @E2i(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  87 ; AVX1-LABEL: E2i:
  88 ; AVX1:       ## BB#0: ## %entry
  89 ; AVX1-NEXT:    vpaddq {{.*}}(%rip), %xmm0, %xmm0
  90 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
  91 ; AVX1-NEXT:    retq
  92 ;
  93 ; AVX2-LABEL: E2i:
  94 ; AVX2:       ## BB#0: ## %entry
  95 ; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
  96 ; AVX2-NEXT:    vpaddq %ymm2, %ymm0, %ymm0
  97 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
  98 ; AVX2-NEXT:    retq
  99 entry:
 100   ; add forces execution domain
 101   %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
 102   %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
 103   ret <4 x i64> %shuffle
 104 }
 105
 106 define <8 x i32> @E3i(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
 107 ; AVX1-LABEL: E3i:
 108 ; AVX1:       ## BB#0: ## %entry
 109 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 110 ; AVX1-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
 111 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 112 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
 113 ; AVX1-NEXT:    retq
 114 ;
 115 ; AVX2-LABEL: E3i:
 116 ; AVX2:       ## BB#0: ## %entry
 117 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
 118 ; AVX2-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
 119 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
 120 ; AVX2-NEXT:    retq
 121 entry:
 122   ; add forces execution domain
 123   %a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 124   %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b, <8 x i32> <i32 undef, i32 5, i32 undef, i32 7, i32 12, i32 13, i32 14, i32 15>
 125   ret <8 x i32> %shuffle
 126 }
 127
 128 define <16 x i16> @E4i(<16 x i16> %a, <16 x i16> %b) nounwind uwtable readnone ssp {
 129 ; AVX1-LABEL: E4i:
 130 ; AVX1:       ## BB#0: ## %entry
 131 ; AVX1-NEXT:    vpaddw {{.*}}(%rip), %xmm0, %xmm0
 132 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 133 ; AVX1-NEXT:    retq
 134 ;
 135 ; AVX2-LABEL: E4i:
 136 ; AVX2:       ## BB#0: ## %entry
 137 ; AVX2-NEXT:    vpaddw {{.*}}(%rip), %ymm0, %ymm0
 138 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
 139 ; AVX2-NEXT:    retq
 140 entry:
 141   ; add forces execution domain
 142   %a2 = add <16 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
 143   %shuffle = shufflevector <16 x i16> %a2, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 144   ret <16 x i16> %shuffle
 145 }
 146
 147 define <16 x i16> @E5i(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp {
 148 ; AVX1-LABEL: E5i:
 149 ; AVX1:       ## BB#0: ## %entry
 150 ; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
 151 ; AVX1-NEXT:    vmovaps (%rsi), %ymm1
 152 ; AVX1-NEXT:    vpaddw {{.*}}(%rip), %xmm0, %xmm0
 153 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 154 ; AVX1-NEXT:    retq
 155 ;
 156 ; AVX2-LABEL: E5i:
 157 ; AVX2:       ## BB#0: ## %entry
 158 ; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
 159 ; AVX2-NEXT:    vmovdqa (%rsi), %ymm1
 160 ; AVX2-NEXT:    vpaddw {{.*}}(%rip), %ymm0, %ymm0
 161 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
 162 ; AVX2-NEXT:    retq
 163 entry:
 164   %c = load <16 x i16>, <16 x i16>* %a
 165   %d = load <16 x i16>, <16 x i16>* %b
 166   %c2 = add <16 x i16> %c, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
 167   %shuffle = shufflevector <16 x i16> %c2, <16 x i16> %d, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 168   ret <16 x i16> %shuffle
 169 }
 170
 171 ;;;; Cases with undef indicies mixed in the mask
 172
 173 define <8 x float> @F(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 174 ; ALL-LABEL: F:
 175 ; ALL:       ## BB#0: ## %entry
 176 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
 177 ; ALL-NEXT:    retq
 178 entry:
 179   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 9, i32 undef, i32 11>
 180   ret <8 x float> %shuffle
 181 }
 182
 183 define <8 x float> @F2(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 184 ; ALL-LABEL: F2:
 185 ; ALL:       ## BB#0: ## %entry
 186 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
 187 ; ALL-NEXT:    retq
 188 entry:
 189   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7>
 190   ret <8 x float> %shuffle
 191 }
 192
 193 define <8 x float> @F3(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 194 ; ALL-LABEL: F3:
 195 ; ALL:       ## BB#0: ## %entry
 196 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
 197 ; ALL-NEXT:    retq
 198 entry:
 199   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 10, i32 11>
 200   ret <8 x float> %shuffle
 201 }
 202
 203 define <8 x float> @F4(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 204 ; ALL-LABEL: F4:
 205 ; ALL:       ## BB#0: ## %entry
 206 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
 207 ; ALL-NEXT:    retq
 208 entry:
 209   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 14, i32 15>
 210   ret <8 x float> %shuffle
 211 }
 212
 213 define <8 x float> @F5(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 214 ; ALL-LABEL: F5:
 215 ; ALL:       ## BB#0: ## %entry
 216 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
 217 ; ALL-NEXT:    retq
 218 entry:
 219   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
 220   ret <8 x float> %shuffle
 221 }
 222
 223 define <8 x float> @F6(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 224 ; ALL-LABEL: F6:
 225 ; ALL:       ## BB#0: ## %entry
 226 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
 227 ; ALL-NEXT:    retq
 228 entry:
 229   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 230   ret <8 x float> %shuffle
 231 }
 232
 233 define <8 x float> @F7(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 234 ; ALL-LABEL: F7:
 235 ; ALL:       ## BB#0: ## %entry
 236 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
 237 ; ALL-NEXT:    retq
 238 entry:
 239   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7>
 240   ret <8 x float> %shuffle
 241 }
 242
 243 define <8 x float> @F8(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 244 ; ALL-LABEL: F8:
 245 ; ALL:       ## BB#0: ## %entry
 246 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
 247 ; ALL-NEXT:    retq
 248 entry:
 249   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 14, i32 15>
 250   ret <8 x float> %shuffle
 251 }
 252
 253 ;;;; Cases we must not select vperm2f128
 254
 255 define <8 x float> @G(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 256 ; ALL-LABEL: G:
 257 ; ALL:       ## BB#0: ## %entry
 258 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
 259 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
 260 ; ALL-NEXT:    retq
 261 entry:
 262   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 12, i32 undef, i32 15>
 263   ret <8 x float> %shuffle
 264 }
 265
 266 ;; Test zero mask generation.
 267 ;; PR22984: https://llvm.org/bugs/show_bug.cgi?id=22984
 268 ;; Prefer xor+vblendpd over vperm2f128 because that has better performance.
 269
 270 define <4 x double> @vperm2z_0x08(<4 x double> %a) {
 271 ; ALL-LABEL: vperm2z_0x08:
 272 ; ALL:       ## BB#0:
 273 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
 274 ; ALL-NEXT:    retq
 275   %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
 276   ret <4 x double> %s
 277 }
 278
 279 define <4 x double> @vperm2z_0x18(<4 x double> %a) {
 280 ; ALL-LABEL: vperm2z_0x18:
 281 ; ALL:       ## BB#0:
 282 ; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
 283 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
 284 ; ALL-NEXT:    retq
 285   %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 286   ret <4 x double> %s
 287 }
 288
 289 define <4 x double> @vperm2z_0x28(<4 x double> %a) {
 290 ; ALL-LABEL: vperm2z_0x28:
 291 ; ALL:       ## BB#0:
 292 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
 293 ; ALL-NEXT:    retq
 294   %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 295   ret <4 x double> %s
 296 }
 297
 298 define <4 x double> @vperm2z_0x38(<4 x double> %a) {
 299 ; ALL-LABEL: vperm2z_0x38:
 300 ; ALL:       ## BB#0:
 301 ; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
 302 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
 303 ; ALL-NEXT:    retq
 304   %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
 305   ret <4 x double> %s
 306 }
 307
 308 define <4 x double> @vperm2z_0x80(<4 x double> %a) {
 309 ; ALL-LABEL: vperm2z_0x80:
 310 ; ALL:       ## BB#0:
 311 ; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
 312 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
 313 ; ALL-NEXT:    retq
 314   %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 315   ret <4 x double> %s
 316 }
 317
 318 define <4 x double> @vperm2z_0x81(<4 x double> %a) {
 319 ; ALL-LABEL: vperm2z_0x81:
 320 ; ALL:       ## BB#0:
 321 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
 322 ; ALL-NEXT:    retq
 323   %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
 324   ret <4 x double> %s
 325 }
 326
 327 define <4 x double> @vperm2z_0x82(<4 x double> %a) {
 328 ; ALL-LABEL: vperm2z_0x82:
 329 ; ALL:       ## BB#0:
 330 ; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
 331 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
 332 ; ALL-NEXT:    retq
 333   %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
 334   ret <4 x double> %s
 335 }
 336
 337 define <4 x double> @vperm2z_0x83(<4 x double> %a) {
 338 ; ALL-LABEL: vperm2z_0x83:
 339 ; ALL:       ## BB#0:
 340 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
 341 ; ALL-NEXT:    retq
 342   %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
 343   ret <4 x double> %s
 344 }
 345
 346 ;; With AVX2 select the integer version of the instruction. Use an add to force the domain selection.
 347
 348 define <4 x i64> @vperm2z_int_0x83(<4 x i64> %a, <4 x i64> %b) {
 349 ; AVX1-LABEL: vperm2z_int_0x83:
 350 ; AVX1:       ## BB#0:
 351 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
 352 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 353 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 354 ; AVX1-NEXT:    vpaddq %xmm2, %xmm3, %xmm2
 355 ; AVX1-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
 356 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 357 ; AVX1-NEXT:    retq
 358 ;
 359 ; AVX2-LABEL: vperm2z_int_0x83:
 360 ; AVX2:       ## BB#0:
 361 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
 362 ; AVX2-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
 363 ; AVX2-NEXT:    retq
 364   %s = shufflevector <4 x i64> <i64 0, i64 0, i64 undef, i64 undef>, <4 x i64> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
 365   %c = add <4 x i64> %b, %s
 366   ret <4 x i64> %c
 367 }
 368