(memopv8i64 addr:$src2), (i8 imm:$imm))),
(VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
-multiclass avx512_valign<string Suffix, RegisterClass RC,
- X86MemOperand x86memop, ValueType IntVT,
- ValueType FloatVT> {
+multiclass avx512_valign<string Suffix, RegisterClass RC, RegisterClass KRC,
+ RegisterClass MRC, X86MemOperand x86memop,
+ ValueType IntVT, ValueType FloatVT> {
def rri : AVX512AIi8<0x03, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, i8imm:$src3),
!strconcat("valign"##Suffix,
(IntVT (X86VAlign RC:$src2, RC:$src1,
(i8 imm:$src3))))]>, EVEX_4V;
+ let Constraints = "$src0 = $dst", AddedComplexity=30 in
+ def rrik : AVX512AIi8<0x03, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src0, KRC:$mask, RC:$src1, RC:$src2, i8imm:$src3),
+ !strconcat("valign"##Suffix,
+ " \t{$src3, $src2, $src1, $mask, $dst|"
+ "$dst, $mask, $src1, $src2, $src3}"),
+ [(set RC:$dst,
+ (IntVT (vselect KRC:$mask,
+ (X86VAlign RC:$src2, RC:$src1,
+ (i8 imm:$src3)),
+ RC:$src0)))]>,
+ EVEX_4V, EVEX_K;
+
// Also match valign of packed floats.
def : Pat<(FloatVT (X86VAlign RC:$src1, RC:$src2, (i8 imm:$imm))),
(!cast<Instruction>(NAME##rri) RC:$src2, RC:$src1, imm:$imm)>;
+ // Non-masking intrinsic call.
+ def : Pat<(IntVT
+ (!cast<Intrinsic>("int_x86_avx512_mask_valign_"##Suffix##"_512")
+ RC:$src1, RC:$src2, imm:$src3,
+ (IntVT (bitconvert (v16i32 immAllZerosV))), -1)),
+ (!cast<Instruction>(NAME#rri) RC:$src1, RC:$src2, imm:$src3)>;
+
+ // Masking intrinsic call.
+ def : Pat<(IntVT
+ (!cast<Intrinsic>("int_x86_avx512_mask_valign_"##Suffix##"_512")
+ RC:$src1, RC:$src2, imm:$src3,
+ RC:$src4, MRC:$mask)),
+ (!cast<Instruction>(NAME#rrik) RC:$src4,
+ (COPY_TO_REGCLASS MRC:$mask, KRC), RC:$src1,
+ RC:$src2, imm:$src3)>;
+
let mayLoad = 1 in
def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, i8imm:$src3),
"$dst, $src1, $src2, $src3}"),
[]>, EVEX_4V;
}
-defm VALIGND : avx512_valign<"d", VR512, i512mem, v16i32, v16f32>,
+defm VALIGND : avx512_valign<"d", VR512, VK16WM, GR16, i512mem, v16i32, v16f32>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VALIGNQ : avx512_valign<"q", VR512, i512mem, v8i64, v8f64>,
+defm VALIGNQ : avx512_valign<"q", VR512, VK8WM, GR8, i512mem, v8i64, v8f64>,
VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
// Helper fragments to match sext vXi1 to vXiY.
}
declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
+
+define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
+; CHECK-LABEL: test_valign_q:
+; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
+ %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> zeroinitializer, i8 -1)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
+; CHECK-LABEL: test_mask_valign_q:
+; CHECK: valignq $2, %zmm1, %zmm0, %k1, %zmm2
+ %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> %src, i8 %mask)
+ ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8)