return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops);
}
-static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();
+ MVT VTElt = VT.getVectorElementType();
+ MVT InVTElt = InVT.getVectorElementType();
SDLoc dl(Op);
+ // SKX processor
+ if ((InVTElt == MVT::i1) &&
+ (((Subtarget->hasBWI() && Subtarget->hasVLX() &&
+ VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() <= 16)) ||
+
+ ((Subtarget->hasBWI() && VT.is512BitVector() &&
+ VTElt.getSizeInBits() <= 16)) ||
+
+ ((Subtarget->hasDQI() && Subtarget->hasVLX() &&
+ VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() >= 32)) ||
+
+ ((Subtarget->hasDQI() && VT.is512BitVector() &&
+ VTElt.getSizeInBits() >= 32))))
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
+
unsigned int NumElts = VT.getVectorNumElements();
+
if (NumElts != 8 && NumElts != 16)
return SDValue();
SDLoc dl(Op);
if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1)
- return LowerSIGN_EXTEND_AVX512(Op, DAG);
+ return LowerSIGN_EXTEND_AVX512(Op, Subtarget, DAG);
if ((VT != MVT::v4i64 || InVT != MVT::v4i32) &&
(VT != MVT::v8i32 || InVT != MVT::v8i16) &&
def : Pat<(truncstorei1 GR8:$src, addr:$dst),
(MOV8mr addr:$dst, GR8:$src)>;
+multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
+def rr : AVX512XS8I<opc, MRMDestReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
+ !strconcat(OpcodeStr##Vec.Suffix, " \t{$src, $dst|$dst, $src}"),
+ [(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))]>, EVEX;
+}
+
+multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
+ string OpcodeStr, Predicate prd> {
+let Predicates = [prd] in
+ defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
+ defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
+ }
+}
+
+multiclass avx512_convert_mask_to_vector<string OpcodeStr> {
+ defm NAME##B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, OpcodeStr,
+ HasBWI>;
+ defm NAME##W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, OpcodeStr,
+ HasBWI>, VEX_W;
+ defm NAME##D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, OpcodeStr,
+ HasDQI>;
+ defm NAME##Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, OpcodeStr,
+ HasDQI>, VEX_W;
+}
+
+defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">;
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s
; CHECK-LABEL: trunc_16x32_to_16x8
; CHECK: vpmovdb
; CHECK-LABEL: sext_8i1_8i32
; CHECK: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
+; SKX: vpmovm2d
; CHECK: ret
define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
%x = icmp slt <8 x i32> %a1, %a2
%res = bitcast <16 x i1> %maskv to i16
ret i16 %res
}
+
+; CHECK-LABEL: sext_8i1_8i16
+; SKX: vpmovm2w
+; CHECK: ret
+define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
+ %x = icmp slt <8 x i32> %a1, %a2
+ %y = sext <8 x i1> %x to <8 x i16>
+ ret <8 x i16> %y
+}
+
+; CHECK-LABEL: sext_16i1_16i32
+; SKX: vpmovm2d
+; CHECK: ret
+define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
+ %x = icmp slt <16 x i32> %a1, %a2
+ %y = sext <16 x i1> %x to <16 x i32>
+ ret <16 x i32> %y
+}
+
+; CHECK-LABEL: sext_8i1_8i64
+; SKX: vpmovm2q
+; CHECK: ret
+define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
+ %x = icmp slt <8 x i32> %a1, %a2
+ %y = sext <8 x i1> %x to <8 x i64>
+ ret <8 x i64> %y
+}