CASE_AVX_INS_COMMON(Inst, Y, r##src) \
CASE_SSE_INS_COMMON(Inst, r##src) \
+#define CASE_SHUF(Inst, src) \
+ CASE_MASK_INS_COMMON(Inst, Z, r##src##i) \
+ CASE_MASK_INS_COMMON(Inst, Z256, r##src##i) \
+ CASE_MASK_INS_COMMON(Inst, Z128, r##src##i) \
+ CASE_AVX_INS_COMMON(Inst, , r##src##i) \
+ CASE_AVX_INS_COMMON(Inst, Y, r##src##i) \
+ CASE_SSE_INS_COMMON(Inst, r##src##i) \
+
+#define CASE_VPERM(Inst, src) \
+ CASE_MASK_INS_COMMON(Inst, Z, src##i) \
+ CASE_MASK_INS_COMMON(Inst, Z256, src##i) \
+ CASE_MASK_INS_COMMON(Inst, Z128, src##i) \
+ CASE_AVX_INS_COMMON(Inst, , src##i) \
+ CASE_AVX_INS_COMMON(Inst, Y, src##i) \
+
#define CASE_VSHUF(Inst, src) \
CASE_MASK_INS_COMMON(SHUFF##Inst, Z, r##src##i) \
CASE_MASK_INS_COMMON(SHUFI##Inst, Z, r##src##i) \
DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i64, 0), ShuffleMask);
break;
- case X86::SHUFPDrri:
- case X86::VSHUFPDrri:
- case X86::VSHUFPDYrri:
+ CASE_SHUF(SHUFPD, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::SHUFPDrmi:
- case X86::VSHUFPDrmi:
- case X86::VSHUFPDYrmi:
+ CASE_SHUF(SHUFPD, m)
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
DecodeSHUFPMask(getRegOperandVectorVT(MI, MVT::f64, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::SHUFPSrri:
- case X86::VSHUFPSrri:
- case X86::VSHUFPSYrri:
+ CASE_SHUF(SHUFPS, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::SHUFPSrmi:
- case X86::VSHUFPSrmi:
- case X86::VSHUFPSYrmi:
+ CASE_SHUF(SHUFPS, m)
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
DecodeSHUFPMask(getRegOperandVectorVT(MI, MVT::f32, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::VPERMILPSri:
- case X86::VPERMILPSYri:
+ CASE_VPERM(PERMILPS, r)
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
- case X86::VPERMILPSmi:
- case X86::VPERMILPSYmi:
+ CASE_VPERM(PERMILPS, m)
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::f32, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::VPERMILPDri:
- case X86::VPERMILPDYri:
+ CASE_VPERM(PERMILPD, r)
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
- case X86::VPERMILPDmi:
- case X86::VPERMILPDYmi:
+ CASE_VPERM(PERMILPD, m)
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::f64, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: ## zmm2 = zmm2[0],k1[1],zmm2[3],k1[2],zmm2[5],k1[4],zmm2[6],k1[6]
; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: ## zmm3 = k1[0],zmm0[1],k1[3],zmm0[2],k1[5],zmm0[4],k1[6],zmm0[6]
; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
; CHECK-NEXT: retq
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vshufps $22, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: ## zmm2 = zmm2[2,1],k1[1,0],zmm2[6,5],k1[5,4],zmm2[10,9],k1[9,8],zmm2[14,13],k1[13,12]
; CHECK-NEXT: vshufps $22, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ## zmm0 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
- %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
%res2 = fadd <16 x float> %res, %res1
ret <16 x float> %res2
}
; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm2 {%k1} {z}
-; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm0
-; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1
-; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: ## zmm1 = zmm1[0,1,3,2,5,4,6,6]
+; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: ## zmm2 = k1[0,1,3,2,5,4,6,6]
+; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm0
+; CHECK-NEXT: ## zmm0 = zmm0[0,1,3,2,5,4,6,6]
+; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3)
%res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3)
%res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1)
define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vpermilps $22, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vpermilps $22, %zmm0, %zmm2 {%k1} {z}
-; CHECK-NEXT: vpermilps $22, %zmm0, %zmm0
-; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1
-; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpermilps $22, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: ## zmm1 = zmm1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
+; CHECK-NEXT: vpermilps $22, %zmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: ## zmm2 = k1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
+; CHECK-NEXT: vpermilps $22, %zmm0, %zmm0
+; CHECK-NEXT: ## zmm0 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
+; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3)
%res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3)
%res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1)
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[0],k1[1]
; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: ## xmm3 = k1[0],xmm0[1]
; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm0
-; CHECK: vaddpd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[1]
+; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> %x3, i8 %x4)
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[0],k1[1],ymm2[3],k1[2]
; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm0
-; CHECK: vaddpd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
+; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
; CHECK-NEXT: retq
%res = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4)
%res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1)
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[2,1],k1[1,0]
; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm0
-; CHECK: vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: ## xmm0 = xmm0[2,1],xmm1[1,0]
+; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 %x4)
%res1 = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 -1)
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[2,1],k1[1,0],ymm2[6,5],k1[5,4]
; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm0
-; CHECK: vaddps %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: ## ymm0 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
+; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
; CHECK-NEXT: retq
%res = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
%res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: ## ymm1 = ymm1[0,1,3,2]
; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: ## ymm2 = k1[0,1,3,2]
; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm0
+; CHECK-NEXT: ## ymm0 = ymm0[0,1,3,2]
; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: ## xmm1 = xmm1[1,0]
; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: ## xmm2 = k1[1,0]
; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0
+; CHECK-NEXT: ## xmm0 = xmm0[1,0]
; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: ## ymm1 = ymm1[2,1,1,0,6,5,5,4]
; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: ## ymm2 = k1[2,1,1,0,6,5,5,4]
; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0
+; CHECK-NEXT: ## ymm0 = ymm0[2,1,1,0,6,5,5,4]
; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1
; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
; CHECK-NEXT: retq
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: ## xmm1 = xmm1[2,1,1,0]
; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: ## xmm2 = k1[2,1,1,0]
; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0
+; CHECK-NEXT: ## xmm0 = xmm0[2,1,1,0]
; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
; SSE-NEXT: retq
;
-; AVX1-LABEL: shuffle_v2f64_10:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v2f64_10:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v2f64_10:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpermilpd $1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v2f64_10:
+; AVX: # BB#0:
+; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
ret <2 x double> %shuffle
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: shuffle_v2f64_32:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v2f64_32:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v2f64_32:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpermilpd $1, %xmm1, %xmm0
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v2f64_32:
+; AVX: # BB#0:
+; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
ret <2 x double> %shuffle
; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; AVX512VL-NEXT: retq
%shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
%bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
; SSE-NEXT: retq
;
-; AVX1-LABEL: shuffle_mem_v2f64_10:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_mem_v2f64_10:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_mem_v2f64_10:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpermilpd $1, (%rdi), %xmm0
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_mem_v2f64_10:
+; AVX: # BB#0:
+; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
+; AVX-NEXT: retq
%a = load <2 x double>, <2 x double>* %ptr
%shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
}
define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_0023:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_0023:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_0023:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpermilpd $8, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_0023:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
ret <4 x double> %shuffle
}
define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_1032:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_1032:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_1032:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpermilpd $5, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_1032:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
ret <4 x double> %shuffle
}
define <4 x double> @shuffle_v4f64_1133(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_1133:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_1133:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_1133:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpermilpd $15, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_1133:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
ret <4 x double> %shuffle
}
define <4 x double> @shuffle_v4f64_1023(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_1023:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_1023:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_1023:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpermilpd $9, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_1023:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
ret <4 x double> %shuffle
}
define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_1022:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_1022:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_1022:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpermilpd $1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_1022:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 2>
ret <4 x double> %shuffle
}
}
define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_5163:
-; AVX1: # BB#0:
-; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_5163:
-; AVX2: # BB#0:
-; AVX2-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_5163:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vshufpd $11, %ymm0, %ymm1, %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_5163:
+; ALL: # BB#0:
+; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3]
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
ret <4 x double> %shuffle
}
; AVX512VL-LABEL: shuffle_v4f64_1054:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: vpermilpd $5, %ymm0, %ymm0
+; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
ret <4 x double> %shuffle
; AVX512VL-LABEL: shuffle_v4f64_3254:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; AVX512VL-NEXT: vpermilpd $5, %ymm0, %ymm0
+; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
ret <4 x double> %shuffle
; AVX512VL-LABEL: shuffle_v4f64_3276:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX512VL-NEXT: vpermilpd $5, %ymm0, %ymm0
+; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
ret <4 x double> %shuffle
}
define <4 x double> @shuffle_v4f64_1076(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_1076:
-; AVX1: # BB#0:
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_1076:
-; AVX2: # BB#0:
-; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
-; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_1076:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
-; AVX512VL-NEXT: vpermilpd $5, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_1076:
+; ALL: # BB#0:
+; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
ret <4 x double> %shuffle
}