CASE_AVX_INS_COMMON(Inst, Y, r##src) \
CASE_SSE_INS_COMMON(Inst, r##src) \
+#define CASE_UNPCK(Inst, src) \
+ CASE_MASK_INS_COMMON(Inst, Z, r##src) \
+ CASE_MASK_INS_COMMON(Inst, Z256, r##src) \
+ CASE_MASK_INS_COMMON(Inst, Z128, r##src) \
+ CASE_AVX_INS_COMMON(Inst, , r##src) \
+ CASE_AVX_INS_COMMON(Inst, Y, r##src) \
+ CASE_SSE_INS_COMMON(Inst, r##src) \
+
#define CASE_VSHUF(Inst, src) \
CASE_MASK_INS_COMMON(SHUFF##Inst, Z, r##src##i) \
CASE_MASK_INS_COMMON(SHUFI##Inst, Z, r##src##i) \
DecodePSWAPMask(MVT::v2i32, ShuffleMask);
break;
+ CASE_UNPCK(PUNPCKHBW, r)
case X86::MMX_PUNPCKHBWirr:
- case X86::PUNPCKHBWrr:
- case X86::VPUNPCKHBWrr:
- case X86::VPUNPCKHBWYrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ CASE_UNPCK(PUNPCKHBW, m)
case X86::MMX_PUNPCKHBWirm:
- case X86::PUNPCKHBWrm:
- case X86::VPUNPCKHBWrm:
- case X86::VPUNPCKHBWYrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i8, 0), ShuffleMask);
break;
+ CASE_UNPCK(PUNPCKHWD, r)
case X86::MMX_PUNPCKHWDirr:
- case X86::PUNPCKHWDrr:
- case X86::VPUNPCKHWDrr:
- case X86::VPUNPCKHWDYrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ CASE_UNPCK(PUNPCKHWD, m)
case X86::MMX_PUNPCKHWDirm:
- case X86::PUNPCKHWDrm:
- case X86::VPUNPCKHWDrm:
- case X86::VPUNPCKHWDYrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i16, 0), ShuffleMask);
break;
+ CASE_UNPCK(PUNPCKHDQ, r)
case X86::MMX_PUNPCKHDQirr:
- case X86::PUNPCKHDQrr:
- case X86::VPUNPCKHDQrr:
- case X86::VPUNPCKHDQYrr:
- case X86::VPUNPCKHDQZrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ CASE_UNPCK(PUNPCKHDQ, m)
case X86::MMX_PUNPCKHDQirm:
- case X86::PUNPCKHDQrm:
- case X86::VPUNPCKHDQrm:
- case X86::VPUNPCKHDQYrm:
- case X86::VPUNPCKHDQZrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i32, 0), ShuffleMask);
break;
- case X86::PUNPCKHQDQrr:
- case X86::VPUNPCKHQDQrr:
- case X86::VPUNPCKHQDQYrr:
- case X86::VPUNPCKHQDQZrr:
+ CASE_UNPCK(PUNPCKHQDQ, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::PUNPCKHQDQrm:
- case X86::VPUNPCKHQDQrm:
- case X86::VPUNPCKHQDQYrm:
- case X86::VPUNPCKHQDQZrm:
+ CASE_UNPCK(PUNPCKHQDQ, m)
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i64, 0), ShuffleMask);
break;
+ CASE_UNPCK(PUNPCKLBW, r)
case X86::MMX_PUNPCKLBWirr:
- case X86::PUNPCKLBWrr:
- case X86::VPUNPCKLBWrr:
- case X86::VPUNPCKLBWYrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ CASE_UNPCK(PUNPCKLBW, m)
case X86::MMX_PUNPCKLBWirm:
- case X86::PUNPCKLBWrm:
- case X86::VPUNPCKLBWrm:
- case X86::VPUNPCKLBWYrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i8, 0), ShuffleMask);
break;
+ CASE_UNPCK(PUNPCKLWD, r)
case X86::MMX_PUNPCKLWDirr:
- case X86::PUNPCKLWDrr:
- case X86::VPUNPCKLWDrr:
- case X86::VPUNPCKLWDYrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ CASE_UNPCK(PUNPCKLWD, m)
case X86::MMX_PUNPCKLWDirm:
- case X86::PUNPCKLWDrm:
- case X86::VPUNPCKLWDrm:
- case X86::VPUNPCKLWDYrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i16, 0), ShuffleMask);
break;
+ CASE_UNPCK(PUNPCKLDQ, r)
case X86::MMX_PUNPCKLDQirr:
- case X86::PUNPCKLDQrr:
- case X86::VPUNPCKLDQrr:
- case X86::VPUNPCKLDQYrr:
- case X86::VPUNPCKLDQZrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ CASE_UNPCK(PUNPCKLDQ, m)
case X86::MMX_PUNPCKLDQirm:
- case X86::PUNPCKLDQrm:
- case X86::VPUNPCKLDQrm:
- case X86::VPUNPCKLDQYrm:
- case X86::VPUNPCKLDQZrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i32, 0), ShuffleMask);
break;
- case X86::PUNPCKLQDQrr:
- case X86::VPUNPCKLQDQrr:
- case X86::VPUNPCKLQDQYrr:
- case X86::VPUNPCKLQDQZrr:
+ CASE_UNPCK(PUNPCKLQDQ, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::PUNPCKLQDQrm:
- case X86::VPUNPCKLQDQrm:
- case X86::VPUNPCKLQDQYrm:
- case X86::VPUNPCKLQDQZrm:
+ CASE_UNPCK(PUNPCKLQDQ, m)
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i64, 0), ShuffleMask);
break;
}
- case X86::UNPCKLPDrr:
- case X86::VUNPCKLPDrr:
- case X86::VUNPCKLPDYrr:
- case X86::VUNPCKLPDZrr:
+ CASE_UNPCK(UNPCKLPD, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::UNPCKLPDrm:
- case X86::VUNPCKLPDrm:
- case X86::VUNPCKLPDYrm:
- case X86::VUNPCKLPDZrm:
+ CASE_UNPCK(UNPCKLPD, m)
DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::UNPCKLPSrr:
- case X86::VUNPCKLPSrr:
- case X86::VUNPCKLPSYrr:
- case X86::VUNPCKLPSZrr:
+ CASE_UNPCK(UNPCKLPS, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::UNPCKLPSrm:
- case X86::VUNPCKLPSrm:
- case X86::VUNPCKLPSYrm:
- case X86::VUNPCKLPSZrm:
+ CASE_UNPCK(UNPCKLPS, m)
DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::UNPCKHPDrr:
- case X86::VUNPCKHPDrr:
- case X86::VUNPCKHPDYrr:
- case X86::VUNPCKHPDZrr:
+ CASE_UNPCK(UNPCKHPD, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::UNPCKHPDrm:
- case X86::VUNPCKHPDrm:
- case X86::VUNPCKHPDYrm:
- case X86::VUNPCKHPDZrm:
+ CASE_UNPCK(UNPCKHPD, m)
DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::UNPCKHPSrr:
- case X86::VUNPCKHPSrr:
- case X86::VUNPCKHPSYrr:
- case X86::VUNPCKHPSZrr:
+ CASE_UNPCK(UNPCKHPS, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::UNPCKHPSrm:
- case X86::VUNPCKHPSrm:
- case X86::VUNPCKHPSYrm:
- case X86::VUNPCKHPSZrm:
+ CASE_UNPCK(UNPCKHPS, m)
DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vunpckhpd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: ## zmm2 = zmm2[1],k1[1],zmm2[3],k1[3],zmm2[5],k1[5],zmm2[7],k1[7]
; CHECK-NEXT: vunpckhpd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ## zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
%res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
%res1 = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
%res2 = fadd <8 x double> %res, %res1
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vunpckhps %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: ## zmm2 = zmm2[2],k1[2],zmm2[3],k1[3],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[14],k1[14],zmm2[15],k1[15]
; CHECK-NEXT: vunpckhps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ## zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
%res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
%res1 = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
%res2 = fadd <16 x float> %res, %res1
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vunpcklpd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: ## zmm2 = zmm2[0],k1[0],zmm2[2],k1[2],zmm2[4],k1[4],zmm2[6],k1[6]
; CHECK-NEXT: vunpcklpd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
%res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
%res1 = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
%res2 = fadd <8 x double> %res, %res1
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vunpcklps %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: ## zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[12],k1[12],zmm2[13],k1[13]
; CHECK-NEXT: vunpcklps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
%res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
%res1 = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
%res2 = fadd <16 x float> %res, %res1
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm3 {%k1} {z}
-; CHECK-NEXT: vpunpcklqdq {{.*#+}}
-; CHECK: vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: ## zmm2 = zmm2[0],k1[0],zmm2[2],k1[2],zmm2[4],k1[4],zmm2[6],k1[6]
+; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm3 {%k1} {z
+; CHECK-NEXT: ## zmm3 = k1[0],zmm0[0],k1[2],zmm0[2],k1[4],zmm0[4],k1[6],zmm0[6]
+; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vpunpckhqdq {{.*#+}}
-; CHECK: vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: ## zmm2 = zmm2[1],k1[1],zmm2[3],k1[3],zmm2[5],k1[5],zmm2[7],k1[7]
+; CHECK-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ## zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
%res1 = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vpunpckhdq {{.*#+}}
-; CHECK: vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: ## zmm2 = zmm2[2],k1[2],zmm2[3],k1[3],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[14],k1[14],zmm2[15],k1[15]
+; CHECK-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ## zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
%res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpunpckldq %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vpunpckldq {{.*#+}}
-; CHECK: vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: ## zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[12],k1[12],zmm2[13],k1[13]
+; CHECK-NEXT: vpunpckldq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
%res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
; CHECK: ## BB#0:
; CHECK-NEXT: kmovq %rdi, %k1
; CHECK-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: ## zmm2 = zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31],zmm2[40],k1[40],zmm2[41],k1[41],zmm2[42],k1[42],zmm2[43],k1[43],zmm2[44],k1[44],zmm2[45],k1[45],zmm2[46],k1[46],zmm2[47],k1[47],zmm2[56],k1[56],zmm2[57],k1[57],zmm2[58],k1[58],zmm2[59],k1[59],zmm2[60],k1[60],zmm2[61],k1[61],zmm2[62],k1[62],zmm2[63],k1[63]
; CHECK-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ## zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
; CHECK: ## BB#0:
; CHECK-NEXT: kmovq %rdi, %k1
; CHECK-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: ## zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[32],k1[32],zmm2[33],k1[33],zmm2[34],k1[34],zmm2[35],k1[35],zmm2[36],k1[36],zmm2[37],k1[37],zmm2[38],k1[38],zmm2[39],k1[39],zmm2[48],k1[48],zmm2[49],k1[49],zmm2[50],k1[50],zmm2[51],k1[51],zmm2[52],k1[52],zmm2[53],k1[53],zmm2[54],k1[54],zmm2[55],k1[55]
; CHECK-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: ## zmm2 = zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31]
; CHECK-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ## zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: ## zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27]
; CHECK-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
define <16 x i8>@test_int_x86_avx512_mask_punpckhb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_128:
; CHECK: vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[8],k1[8],xmm2[9],k1[9],xmm2[10],k1[10],xmm2[11],k1[11],xmm2[12],k1[12],xmm2[13],k1[13],xmm2[14],k1[14],xmm2[15],k1[15]
; CHECK-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x68,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
%res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
%res1 = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
%res2 = add <16 x i8> %res, %res1
define <16 x i8>@test_int_x86_avx512_mask_punpcklb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_128:
; CHECK: vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1],xmm2[2],k1[2],xmm2[3],k1[3],xmm2[4],k1[4],xmm2[5],k1[5],xmm2[6],k1[6],xmm2[7],k1[7]
; CHECK-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x60,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
%res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
%res1 = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
%res2 = add <16 x i8> %res, %res1
define <32 x i8>@test_int_x86_avx512_mask_punpckhb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_256:
; CHECK: vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[8],k1[8],ymm2[9],k1[9],ymm2[10],k1[10],ymm2[11],k1[11],ymm2[12],k1[12],ymm2[13],k1[13],ymm2[14],k1[14],ymm2[15],k1[15],ymm2[24],k1[24],ymm2[25],k1[25],ymm2[26],k1[26],ymm2[27],k1[27],ymm2[28],k1[28],ymm2[29],k1[29],ymm2[30],k1[30],ymm2[31],k1[31]
; CHECK-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x68,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
%res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
%res1 = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
%res2 = add <32 x i8> %res, %res1
define <32 x i8>@test_int_x86_avx512_mask_punpcklb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_256:
; CHECK: vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[2],k1[2],ymm2[3],k1[3],ymm2[4],k1[4],ymm2[5],k1[5],ymm2[6],k1[6],ymm2[7],k1[7],ymm2[16],k1[16],ymm2[17],k1[17],ymm2[18],k1[18],ymm2[19],k1[19],ymm2[20],k1[20],ymm2[21],k1[21],ymm2[22],k1[22],ymm2[23],k1[23]
; CHECK-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x60,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
%res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
%res1 = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
%res2 = add <32 x i8> %res, %res1
define <8 x i16>@test_int_x86_avx512_mask_punpcklw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_128:
; CHECK: vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1],xmm2[2],k1[2],xmm2[3],k1[3]
; CHECK-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x61,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
%res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
%res1 = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
%res2 = add <8 x i16> %res, %res1
define <8 x i16>@test_int_x86_avx512_mask_punpckhw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_128:
; CHECK: vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[4],k1[4],xmm2[5],k1[5],xmm2[6],k1[6],xmm2[7],k1[7]
; CHECK-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x69,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
%res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
%res1 = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
%res2 = add <8 x i16> %res, %res1
define <16 x i16>@test_int_x86_avx512_mask_punpcklw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_256:
; CHECK: vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[2],k1[2],ymm2[3],k1[3],ymm2[8],k1[8],ymm2[9],k1[9],ymm2[10],k1[10],ymm2[11],k1[11]
; CHECK-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x61,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
%res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
%res1 = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
%res2 = add <16 x i16> %res, %res1
define <16 x i16>@test_int_x86_avx512_mask_punpckhw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_256:
; CHECK: vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[4],k1[4],ymm2[5],k1[5],ymm2[6],k1[6],ymm2[7],k1[7],ymm2[12],k1[12],ymm2[13],k1[13],ymm2[14],k1[14],ymm2[15],k1[15]
; CHECK-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x69,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
%res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
%res1 = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
%res2 = add <16 x i16> %res, %res1
define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_128:
; CHECK: vunpckhpd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[1],k1[1]
; CHECK-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x15,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[1],xmm1[1]
%res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
%res1 = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
%res2 = fadd <2 x double> %res, %res1
define <4 x double>@test_int_x86_avx512_mask_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_256:
; CHECK: vunpckhpd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[1],k1[1],ymm2[3],k1[3]
; CHECK-NEXT: vunpckhpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x15,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
%res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
%res1 = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
%res2 = fadd <4 x double> %res, %res1
define <4 x float>@test_int_x86_avx512_mask_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_128:
; CHECK: vunpckhps %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[2],k1[2],xmm2[3],k1[3]
; CHECK-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x15,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
%res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
%res1 = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
%res2 = fadd <4 x float> %res, %res1
; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_256:
; CHECK: ## BB#0:
; CHECK: vunpckhps %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[2],k1[2],ymm2[3],k1[3],ymm2[6],k1[6],ymm2[7],k1[7]
; CHECK-NEXT: vunpckhps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x15,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
%res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
%res1 = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
%res2 = fadd <8 x float> %res, %res1
define <2 x double>@test_int_x86_avx512_mask_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_128:
; CHECK: vunpcklpd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0]
; CHECK-NEXT: vunpcklpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x14,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0]
%res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
%res1 = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
%res2 = fadd <2 x double> %res, %res1
define <4 x double>@test_int_x86_avx512_mask_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_256:
; CHECK: vunpcklpd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[2],k1[2]
; CHECK-NEXT: vunpcklpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x14,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
%res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
%res1 = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
%res2 = fadd <4 x double> %res, %res1
define <4 x float>@test_int_x86_avx512_mask_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_128:
; CHECK: vunpcklps %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1]
; CHECK-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x14,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
%res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
%res1 = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
%res2 = fadd <4 x float> %res, %res1
define <8 x float>@test_int_x86_avx512_mask_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_256:
; CHECK: vunpcklps %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[4],k1[4],ymm2[5],k1[5]
; CHECK-NEXT: vunpcklps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x14,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
%res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
%res1 = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
%res2 = fadd <8 x float> %res, %res1
define <4 x i32>@test_int_x86_avx512_mask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_128:
; CHECK: vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[2],k1[2],xmm2[3],k1[3]
; CHECK-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6a,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
%res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
%res2 = add <4 x i32> %res, %res1
define <4 x i32>@test_int_x86_avx512_mask_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_128:
; CHECK: vpunpckldq %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1]
; CHECK-NEXT: vpunpckldq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x62,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
%res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
%res2 = add <4 x i32> %res, %res1
; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_256:
; CHECK: ## BB#0:
; CHECK: vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[2],k1[2],ymm2[3],k1[3],ymm2[6],k1[6],ymm2[7],k1[7]
; CHECK-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6a,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
%res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
%res2 = add <8 x i32> %res, %res1
define <8 x i32>@test_int_x86_avx512_mask_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_256:
; CHECK: vpunpckldq %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[4],k1[4],ymm2[5],k1[5]
; CHECK-NEXT: vpunpckldq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x62,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
%res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
%res2 = add <8 x i32> %res, %res1
define <2 x i64>@test_int_x86_avx512_mask_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128:
; CHECK: vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[1],k1[1]
; CHECK-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6d,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[1],xmm1[1]
%res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
%res2 = add <2 x i64> %res, %res1
define <2 x i64>@test_int_x86_avx512_mask_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128:
; CHECK: vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0]
; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6c,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0]
%res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
%res2 = add <2 x i64> %res, %res1
define <4 x i64>@test_int_x86_avx512_mask_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256:
; CHECK: vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[2],k1[2]
; CHECK-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6c,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
%res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
%res2 = add <4 x i64> %res, %res1
define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256:
; CHECK: vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[1],k1[1],ymm2[3],k1[3]
; CHECK-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6d,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
%res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
%res2 = add <4 x i64> %res, %res1
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
-; AVX1-LABEL: shuffle_v2i64_02:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v2i64_02:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v2i64_02:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v2i64_02:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %shuffle
}
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: shuffle_v2i64_02_copy:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v2i64_02_copy:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v2i64_02_copy:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm0
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v2i64_02_copy:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %shuffle
}
; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
-; AVX1-LABEL: shuffle_v2i64_13:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v2i64_13:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v2i64_13:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v2i64_13:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
ret <2 x i64> %shuffle
}
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: shuffle_v2i64_13_copy:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v2i64_13_copy:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v2i64_13_copy:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpunpckhqdq %xmm2, %xmm1, %xmm0
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v2i64_13_copy:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
ret <2 x i64> %shuffle
}
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: shuffle_v2i64_20:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v2i64_20:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v2i64_20:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v2i64_20:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
ret <2 x i64> %shuffle
}
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: shuffle_v2i64_20_copy:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v2i64_20_copy:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v2i64_20_copy:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpunpcklqdq %xmm1, %xmm2, %xmm0
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v2i64_20_copy:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
ret <2 x i64> %shuffle
}
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: shuffle_v2i64_31:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v2i64_31:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v2i64_31:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpunpckhqdq %xmm0, %xmm1, %xmm0
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v2i64_31:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
ret <2 x i64> %shuffle
}
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: shuffle_v2i64_31_copy:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v2i64_31_copy:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v2i64_31_copy:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpunpckhqdq %xmm1, %xmm2, %xmm0
-; AVX512VL-NEXT: retq
+; AVX-LABEL: shuffle_v2i64_31_copy:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
ret <2 x i64> %shuffle
}
; AVX512VL-LABEL: shuffle_v2f64_1z:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
ret <2 x double> %shuffle
; AVX512VL-LABEL: shuffle_v2f64_z0:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vunpcklpd %xmm0, %xmm1, %xmm0
+; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
ret <2 x double> %shuffle
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
-; AVX1-LABEL: insert_reg_hi_v2i64:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovq %rdi, %xmm1
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: insert_reg_hi_v2i64:
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovq %rdi, %xmm1
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: insert_reg_hi_v2i64:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovq %rdi, %xmm1
-; AVX512VL-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX-LABEL: insert_reg_hi_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovq %rdi, %xmm1
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
%v = insertelement <2 x i64> undef, i64 %a, i32 0
%shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
ret <2 x i64> %shuffle
; AVX512VL-LABEL: insert_mem_hi_v2i64:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovq (%rdi), %xmm1
-; AVX512VL-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512VL-NEXT: retq
%a = load i64, i64* %ptr
%v = insertelement <2 x i64> undef, i64 %a, i32 0
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: insert_reg_hi_v2f64:
-; AVX1: # BB#0:
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: insert_reg_hi_v2f64:
-; AVX2: # BB#0:
-; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: insert_reg_hi_v2f64:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vunpcklpd %xmm0, %xmm1, %xmm0
-; AVX512VL-NEXT: retq
+; AVX-LABEL: insert_reg_hi_v2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: retq
%v = insertelement <2 x double> undef, double %a, i32 0
%shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
ret <2 x double> %shuffle
}
define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_0426:
-; AVX1: # BB#0:
-; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_0426:
-; AVX2: # BB#0:
-; AVX2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_0426:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vunpcklpd %ymm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_0426:
+; ALL: # BB#0:
+; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
ret <4 x double> %shuffle
}
define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_1537:
-; AVX1: # BB#0:
-; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_1537:
-; AVX2: # BB#0:
-; AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_1537:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vunpckhpd %ymm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_1537:
+; ALL: # BB#0:
+; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
ret <4 x double> %shuffle
}
define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_4062:
-; AVX1: # BB#0:
-; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_4062:
-; AVX2: # BB#0:
-; AVX2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_4062:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vunpcklpd %ymm0, %ymm1, %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_4062:
+; ALL: # BB#0:
+; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
ret <4 x double> %shuffle
}
define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_5173:
-; AVX1: # BB#0:
-; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_5173:
-; AVX2: # BB#0:
-; AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_5173:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vunpckhpd %ymm0, %ymm1, %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_5173:
+; ALL: # BB#0:
+; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3>
ret <4 x double> %shuffle
}
}
define <4 x double> @shuffle_v4f64_u062(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_u062:
-; AVX1: # BB#0:
-; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_u062:
-; AVX2: # BB#0:
-; AVX2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_u062:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vunpcklpd %ymm0, %ymm1, %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_u062:
+; ALL: # BB#0:
+; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 undef, i32 0, i32 6, i32 2>
ret <4 x double> %shuffle
}
;
; AVX512VL-LABEL: shuffle_v4i64_40u2:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpunpcklqdq %ymm0, %ymm1, %ymm0
+; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 2>
ret <4 x i64> %shuffle
;
; AVX512VL-LABEL: bitcast_v4f64_0426:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; AVX512VL-NEXT: retq
%shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
%bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float>