From: Elena Demikhovsky Date: Sun, 5 Oct 2014 14:11:08 +0000 (+0000) Subject: AVX-512-SKX: Added instruction VPMOVM2B/W/D/Q. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=a0cb2c75b018c40e722667e28b1507fb0fe95810;p=oota-llvm.git AVX-512-SKX: Added instruction VPMOVM2B/W/D/Q. This instruction allows to broadacst mask vector to data vector. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219083 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 64e0db1e70f..f57da0a1d6f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15119,13 +15119,32 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops); } -static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); MVT InVT = In.getSimpleValueType(); + MVT VTElt = VT.getVectorElementType(); + MVT InVTElt = InVT.getVectorElementType(); SDLoc dl(Op); + // SKX processor + if ((InVTElt == MVT::i1) && + (((Subtarget->hasBWI() && Subtarget->hasVLX() && + VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() <= 16)) || + + ((Subtarget->hasBWI() && VT.is512BitVector() && + VTElt.getSizeInBits() <= 16)) || + + ((Subtarget->hasDQI() && Subtarget->hasVLX() && + VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() >= 32)) || + + ((Subtarget->hasDQI() && VT.is512BitVector() && + VTElt.getSizeInBits() >= 32)))) + return DAG.getNode(X86ISD::VSEXT, dl, VT, In); + unsigned int NumElts = VT.getVectorNumElements(); + if (NumElts != 8 && NumElts != 16) return SDValue(); @@ -15158,7 +15177,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget, SDLoc dl(Op); if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1) - return LowerSIGN_EXTEND_AVX512(Op, DAG); + return LowerSIGN_EXTEND_AVX512(Op, Subtarget, DAG); if ((VT != MVT::v4i64 || InVT != MVT::v4i32) && (VT != MVT::v8i32 || InVT != MVT::v8i16) && diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 10055d065dc..b205de058a6 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -5001,3 +5001,32 @@ def truncstorei1 : PatFrag<(ops node:$val, node:$ptr), def : Pat<(truncstorei1 GR8:$src, addr:$dst), (MOV8mr addr:$dst, GR8:$src)>; +multiclass cvt_by_vec_width opc, X86VectorVTInfo Vec, string OpcodeStr > { +def rr : AVX512XS8I, EVEX; +} + +multiclass cvt_mask_by_elt_width opc, AVX512VLVectorVTInfo VTInfo, + string OpcodeStr, Predicate prd> { +let Predicates = [prd] in + defm Z : cvt_by_vec_width, EVEX_V512; + + let Predicates = [prd, HasVLX] in { + defm Z256 : cvt_by_vec_width, EVEX_V256; + defm Z128 : cvt_by_vec_width, EVEX_V128; + } +} + +multiclass avx512_convert_mask_to_vector { + defm NAME##B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, OpcodeStr, + HasBWI>; + defm NAME##W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, OpcodeStr, + HasBWI>, VEX_W; + defm NAME##D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, OpcodeStr, + HasDQI>; + defm NAME##Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, OpcodeStr, + HasDQI>, VEX_W; +} + +defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">; diff --git a/test/CodeGen/X86/avx512-trunc-ext.ll b/test/CodeGen/X86/avx512-trunc-ext.ll index f1b639e110f..91ef5d58f43 100644 --- a/test/CodeGen/X86/avx512-trunc-ext.ll +++ b/test/CodeGen/X86/avx512-trunc-ext.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s ; CHECK-LABEL: trunc_16x32_to_16x8 ; CHECK: vpmovdb @@ -118,6 +119,7 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %a) { ; CHECK-LABEL: sext_8i1_8i32 ; CHECK: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z} +; SKX: vpmovm2d ; CHECK: ret define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { %x = icmp slt <8 x i32> %a1, %a2 @@ -145,3 +147,30 @@ define i16 @trunc_i32_to_i1(i32 %a) { %res = bitcast <16 x i1> %maskv to i16 ret i16 %res } + +; CHECK-LABEL: sext_8i1_8i16 +; SKX: vpmovm2w +; CHECK: ret +define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind { + %x = icmp slt <8 x i32> %a1, %a2 + %y = sext <8 x i1> %x to <8 x i16> + ret <8 x i16> %y +} + +; CHECK-LABEL: sext_16i1_16i32 +; SKX: vpmovm2d +; CHECK: ret +define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind { + %x = icmp slt <16 x i32> %a1, %a2 + %y = sext <16 x i1> %x to <16 x i32> + ret <16 x i32> %y +} + +; CHECK-LABEL: sext_8i1_8i64 +; SKX: vpmovm2q +; CHECK: ret +define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind { + %x = icmp slt <8 x i32> %a1, %a2 + %y = sext <8 x i1> %x to <8 x i64> + ret <8 x i64> %y +}