From: Elena Demikhovsky Date: Wed, 11 Dec 2013 14:31:04 +0000 (+0000) Subject: AVX-512: Removed "z" suffix from AVX-512 instructions, since it is incompatible with... X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=3bf51cf30293272198515bfe315db1dd81d86e63;p=oota-llvm.git AVX-512: Removed "z" suffix from AVX-512 instructions, since it is incompatible with GCC. I moved a test from avx512-vbroadcast-crash.ll to avx512-vbroadcast.ll I defined HasAVX512 predicate as AssemblerPredicate. It means that you should invoke llvm-mc with "-mcpu=knl" to get encoding for AVX-512 instructions. I need this to let AsmMatcher to set different encoding for AVX and AVX-512 instructions that have the same mnemonic and operands (all scalar instructions). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@197041 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 62e3630dc82..4c114e2a40c 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -207,12 +207,12 @@ def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1), // vinsertps - insert f32 to XMM def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3), - "vinsertps{z}\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR128X:$dst, (X86insrtps VR128X:$src1, VR128X:$src2, imm:$src3))]>, EVEX_4V; def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), (ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3), - "vinsertps{z}\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR128X:$dst, (X86insrtps VR128X:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>; @@ -352,13 +352,13 @@ def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)), // vextractps - extract 32 bits from XMM def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src1, u32u8imm:$src2), - "vextractps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, EVEX; def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs), (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2), - "vextractps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), addr:$dst)]>, EVEX; @@ -375,13 +375,13 @@ multiclass avx512_fp_broadcast opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),[]>, EVEX; } let ExeDomain = SSEPackedSingle in { - defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss{z}", VR512, + defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512, VR128X, f32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; } let ExeDomain = SSEPackedDouble in { - defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd{z}", VR512, + defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512, VR128X, f64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; } @@ -1230,32 +1230,32 @@ def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src1), // Move Int Doubleword to Packed Double Int // def VMOVDI2PDIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", + "vmovd\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, EVEX, VEX_LIG; def VMOVDI2PDIZrm : AVX512SI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", + "vmovd\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; def VMOV64toPQIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", + "vmovq\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v2i64 (scalar_to_vector GR64:$src)))], IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG; let isCodeGenOnly = 1 in { def VMOV64toSDZrr : AVX512SI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", + "vmovq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>; def VMOVSDto64Zrr : AVX512SI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", + "vmovq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>; } def VMOVSDto64Zmr : AVX512SI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", + "vmovq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)], IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>, EVEX_CD8<64, CD8VT1>; @@ -1264,12 +1264,12 @@ def VMOVSDto64Zmr : AVX512SI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$s // let isCodeGenOnly = 1 in { def VMOVDI2SSZrr : AVX512SI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", + "vmovd\t{$src, $dst|$dst, $src}", [(set FR32X:$dst, (bitconvert GR32:$src))], IIC_SSE_MOVDQ>, EVEX, VEX_LIG; def VMOVDI2SSZrm : AVX512SI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", + "vmovd\t{$src, $dst|$dst, $src}", [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))], IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; } @@ -1277,13 +1277,13 @@ def VMOVDI2SSZrm : AVX512SI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$sr // Move Packed Doubleword Int to Packed Double Int // def VMOVPDI2DIZrr : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", + "vmovd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG; def VMOVPDI2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128X:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", + "vmovd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128X:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; @@ -1291,7 +1291,7 @@ def VMOVPDI2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs), // Move Packed Doubleword Int first element to Doubleword Int // def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", + "vmovq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>, TB, OpSize, EVEX, VEX_LIG, VEX_W, @@ -1299,7 +1299,7 @@ def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", + "vmovq\t{$src, $dst|$dst, $src}", [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), addr:$dst)], IIC_SSE_MOVDQ>, EVEX, OpSize, VEX_LIG, VEX_W, TB, EVEX_CD8<64, CD8VT1>, @@ -1310,12 +1310,12 @@ def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs), let isCodeGenOnly = 1 in { def VMOVSS2DIZrr : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32X:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", + "vmovd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32X:$src))], IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG; def VMOVSS2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32X:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", + "vmovd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32X:$src)), addr:$dst)], IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; } @@ -1324,7 +1324,7 @@ def VMOVSS2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs), // def VMOVQI2PQIZrm : AVX512SI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i64mem:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", + "vmovq\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; @@ -1352,11 +1352,11 @@ multiclass avx512_move_scalar , XS, EVEX_CD8<32, CD8VT1>; let ExeDomain = SSEPackedDouble in -defm VMOVSDZ : avx512_move_scalar<"movsd{z}", FR64X, X86Movsd, v2f64, f64mem, +defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem, loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>; @@ -1364,12 +1364,12 @@ defm VMOVSDZ : avx512_move_scalar<"movsd{z}", FR64X, X86Movsd, v2f64, f64mem, let isCodeGenOnly = 1 in { def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src1, FR32X:$src2), - "movss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_MOV_S_RR>, XS, EVEX_4V, VEX_LIG; def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src1, FR64X:$src2), - "movsd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_MOV_S_RR>, XD, EVEX_4V, VEX_LIG, VEX_W; } @@ -1518,7 +1518,7 @@ let Predicates = [HasAVX512] in { let AddedComplexity = 15 in def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", + "vmovq\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v2i64 (X86vzmovl (v2i64 VR128X:$src))))], IIC_SSE_MOVQ_RR>, EVEX, VEX_W; @@ -1526,7 +1526,7 @@ def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), let AddedComplexity = 20 in def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), (ins i128mem:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", + "vmovq\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v2i64 (X86vzmovl (loadv2i64 addr:$src))))], IIC_SSE_MOVDQ>, EVEX, VEX_W, @@ -1813,10 +1813,10 @@ defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VR512, memopv8 multiclass avx512_binop_s opc, string OpcodeStr, SDNode OpNode, SizeItins itins> { - defm SSZ : sse12_fp_scalar, XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; - defm SDZ : sse12_fp_scalar, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; } @@ -2125,12 +2125,12 @@ def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))), //===----------------------------------------------------------------------===// def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), - "vmovlhps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))], IIC_SSE_MOV_LH>, EVEX_4V; def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), - "vmovhlps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))], IIC_SSE_MOV_LH>, EVEX_4V; @@ -2325,21 +2325,21 @@ multiclass avx512_fma3s_rm opc, string OpcodeStr, SDNode OpNode, } // Constraints = "$src1 = $dst" -defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss{z}", X86Fmadd, FR32X, +defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X, f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd{z}", X86Fmadd, FR64X, +defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X, f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss{z}", X86Fmsub, FR32X, +defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X, f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd{z}", X86Fmsub, FR64X, +defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X, f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss{z}", X86Fnmadd, FR32X, +defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X, f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd{z}", X86Fnmadd, FR64X, +defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X, f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss{z}", X86Fnmsub, FR32X, +defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X, f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd{z}", X86Fnmsub, FR64X, +defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X, f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; //===----------------------------------------------------------------------===// @@ -2360,13 +2360,13 @@ let neverHasSideEffects = 1 in { } // neverHasSideEffects = 1 } let Predicates = [HasAVX512] in { -defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}{z}">, +defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">, XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}{z}">, +defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">, XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; -defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}{z}">, +defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}{z}">, +defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">, XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), @@ -2387,13 +2387,13 @@ def : Pat<(f64 (sint_to_fp GR32:$src)), def : Pat<(f64 (sint_to_fp GR64:$src)), (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; -defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}{z}">, +defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">, XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}{z}">, +defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">, XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; -defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}{z}">, +defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}{z}">, +defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">, XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))), @@ -2424,98 +2424,99 @@ multiclass avx512_cvt_s_int opc, RegisterClass SrcRC, RegisterClass DstR let neverHasSideEffects = 1 in { def rr : SI, EVEX, VEX_LIG; + [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG, + Requires<[HasAVX512]>; let mayLoad = 1 in def rm : SI, EVEX, VEX_LIG; + !strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG, + Requires<[HasAVX512]>; } // neverHasSideEffects = 1 } let Predicates = [HasAVX512] in { // Convert float/double to signed/unsigned int 32/64 defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si, - ssmem, sse_load_f32, "cvtss2si{z}">, + ssmem, sse_load_f32, "cvtss2si">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64, - ssmem, sse_load_f32, "cvtss2si{z}">, + ssmem, sse_load_f32, "cvtss2si">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi, - ssmem, sse_load_f32, "cvtss2usi{z}">, + ssmem, sse_load_f32, "cvtss2usi">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64, int_x86_avx512_cvtss2usi64, ssmem, - sse_load_f32, "cvtss2usi{z}">, XS, VEX_W, + sse_load_f32, "cvtss2usi">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si, - sdmem, sse_load_f64, "cvtsd2si{z}">, + sdmem, sse_load_f64, "cvtsd2si">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64, - sdmem, sse_load_f64, "cvtsd2si{z}">, + sdmem, sse_load_f64, "cvtsd2si">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi, - sdmem, sse_load_f64, "cvtsd2usi{z}">, + sdmem, sse_load_f64, "cvtsd2usi">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64, int_x86_avx512_cvtsd2usi64, sdmem, - sse_load_f64, "cvtsd2usi{z}">, XD, VEX_W, + sse_load_f64, "cvtsd2usi">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, - int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}{z}", + int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}", SSE_CVT_Scalar, 0>, XS, EVEX_4V; defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, - int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}{z}", + int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}", SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W; defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, - int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}{z}", + int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}", SSE_CVT_Scalar, 0>, XD, EVEX_4V; defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, - int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}{z}", + int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W; defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, - int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}{z}", + int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}", SSE_CVT_Scalar, 0>, XS, EVEX_4V; defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, - int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}{z}", + int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}", SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W; defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, - int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}{z}", + int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}", SSE_CVT_Scalar, 0>, XD, EVEX_4V; defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, - int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}{z}", + int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}", SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W; // Convert float/double to signed/unsigned int 32/64 with truncation defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si, - ssmem, sse_load_f32, "cvttss2si{z}">, + ssmem, sse_load_f32, "cvttss2si">, XS, EVEX_CD8<32, CD8VT1>; defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64, int_x86_sse_cvttss2si64, ssmem, sse_load_f32, - "cvttss2si{z}">, XS, VEX_W, + "cvttss2si">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si, - sdmem, sse_load_f64, "cvttsd2si{z}">, XD, + sdmem, sse_load_f64, "cvttsd2si">, XD, EVEX_CD8<64, CD8VT1>; defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64, int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, - "cvttsd2si{z}">, XD, VEX_W, + "cvttsd2si">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32, int_x86_avx512_cvttss2usi, ssmem, sse_load_f32, - "cvttss2si{z}">, XS, EVEX_CD8<32, CD8VT1>; + "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>; defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64, int_x86_avx512_cvttss2usi64, ssmem, - sse_load_f32, "cvttss2usi{z}">, XS, VEX_W, + sse_load_f32, "cvttss2usi">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32, int_x86_avx512_cvttsd2usi, - sdmem, sse_load_f64, "cvttsd2usi{z}">, XD, + sdmem, sse_load_f64, "cvttsd2usi">, XD, EVEX_CD8<64, CD8VT1>; defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64, int_x86_avx512_cvttsd2usi64, sdmem, - sse_load_f64, "cvttsd2usi{z}">, XD, VEX_W, + sse_load_f64, "cvttsd2usi">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; -} multiclass avx512_cvt_s opc, RegisterClass SrcRC, RegisterClass DstRC, SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, @@ -2529,53 +2530,54 @@ multiclass avx512_cvt_s opc, RegisterClass SrcRC, RegisterClass DstRC, } defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem, - loadf32, "cvttss2si{z}">, XS, + loadf32, "cvttss2si">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem, - loadf32, "cvttss2usi{z}">, XS, + loadf32, "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem, - loadf32, "cvttss2si{z}">, XS, VEX_W, + loadf32, "cvttss2si">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem, - loadf32, "cvttss2usi{z}">, XS, VEX_W, + loadf32, "cvttss2usi">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem, - loadf64, "cvttsd2si{z}">, XD, + loadf64, "cvttsd2si">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem, - loadf64, "cvttsd2usi{z}">, XD, + loadf64, "cvttsd2usi">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem, - loadf64, "cvttsd2si{z}">, XD, VEX_W, + loadf64, "cvttsd2si">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem, - loadf64, "cvttsd2usi{z}">, XD, VEX_W, + loadf64, "cvttsd2usi">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; +} // HasAVX512 //===----------------------------------------------------------------------===// // AVX-512 Convert form float to double and back //===----------------------------------------------------------------------===// let neverHasSideEffects = 1 in { def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst), (ins FR32X:$src1, FR32X:$src2), - "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; let mayLoad = 1 in def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst), (ins FR32X:$src1, f32mem:$src2), - "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<32, CD8VT1>; // Convert scalar double to scalar single def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst), (ins FR64X:$src1, FR64X:$src2), - "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>; let mayLoad = 1 in def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst), (ins FR64X:$src1, f64mem:$src2), - "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>; } @@ -2730,17 +2732,17 @@ defm VCVTPS2PHZ : avx512_f16c_ps2ph, TB, EVEX, VEX_LIG, + "ucomiss">, TB, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, - "ucomisd{z}">, TB, OpSize, EVEX, + "ucomisd">, TB, OpSize, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; let Pattern = [] in { defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load, - "comiss{z}">, TB, EVEX, VEX_LIG, + "comiss">, TB, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load, - "comisd{z}">, TB, OpSize, EVEX, + "comisd">, TB, OpSize, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem, @@ -2954,12 +2956,12 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr, def SSZr : SI, XS, EVEX_4V; def SSZr_Int : SIi8, XS, EVEX_4V; @@ -2967,12 +2969,12 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr, def SSZm : SI, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>; def SSZm_Int : SIi8, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>; @@ -2980,12 +2982,12 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr, def SDZr : SI, + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, XD, EVEX_4V, VEX_W; def SDZr_Int : SIi8, XD, EVEX_4V, VEX_W; @@ -2993,12 +2995,12 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr, def SDZm : SI, + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; def SDZm_Int : SIi8, XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 6e5d54349fa..cc60acb2b7d 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -657,7 +657,8 @@ def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasAVX2 : Predicate<"Subtarget->hasAVX2()">; def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">; -def HasAVX512 : Predicate<"Subtarget->hasAVX512()">; +def HasAVX512 : Predicate<"Subtarget->hasAVX512()">, + AssemblerPredicate<"FeatureAVX512", "AVX-512 ISA">; def UseAVX : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">; def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">; def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">; diff --git a/test/CodeGen/X86/avx512-arith.ll b/test/CodeGen/X86/avx512-arith.ll index e27600ecd73..223c023a8a4 100644 --- a/test/CodeGen/X86/avx512-arith.ll +++ b/test/CodeGen/X86/avx512-arith.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s ; CHECK-LABEL: addpd512 ; CHECK: vaddpd @@ -196,7 +196,7 @@ define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { } ; CHECK-LABEL: sqrtA -; CHECK: vsqrtssz +; CHECK: vsqrtss {{.*}} encoding: [0x62 ; CHECK: ret declare float @sqrtf(float) readnone define float @sqrtA(float %a) nounwind uwtable readnone ssp { @@ -206,7 +206,7 @@ entry: } ; CHECK-LABEL: sqrtB -; CHECK: vsqrtsdz +; CHECK: vsqrtsd {{.*}}## encoding: [0x62 ; CHECK: ret declare double @sqrt(double) readnone define double @sqrtB(double %a) nounwind uwtable readnone ssp { @@ -216,7 +216,7 @@ entry: } ; CHECK-LABEL: sqrtC -; CHECK: vsqrtssz +; CHECK: vsqrtss {{.*}}## encoding: [0x62 ; CHECK: ret declare float @llvm.sqrt.f32(float) define float @sqrtC(float %a) nounwind { diff --git a/test/CodeGen/X86/avx512-cmp.ll b/test/CodeGen/X86/avx512-cmp.ll index ba52745e6c1..5201e32f574 100644 --- a/test/CodeGen/X86/avx512-cmp.ll +++ b/test/CodeGen/X86/avx512-cmp.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s -; CHECK: vucomisdz +; CHECK: vucomisd {{.*}}encoding: [0x62 define double @test1(double %a, double %b) nounwind { %tobool = fcmp une double %a, %b br i1 %tobool, label %l1, label %l2 @@ -13,7 +13,7 @@ l2: ret double %c1 } -; CHECK: vucomissz +; CHECK: vucomiss {{.*}}encoding: [0x62 define float @test2(float %a, float %b) nounwind { %tobool = fcmp olt float %a, %b br i1 %tobool, label %l1, label %l2 diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll index ed68ff7bcbd..89a69e7b982 100644 --- a/test/CodeGen/X86/avx512-cvt.ll +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s ; CHECK-LABEL: sitof32 ; CHECK: vcvtdq2ps %zmm @@ -67,7 +67,7 @@ define <8 x double> @fpext00(<8 x float> %b) nounwind { } ; CHECK-LABEL: funcA -; CHECK: vcvtsi2sdqz (% +; CHECK: vcvtsi2sdq (%rdi){{.*}} encoding: [0x62 ; CHECK: ret define double @funcA(i64* nocapture %e) { entry: @@ -77,7 +77,7 @@ entry: } ; CHECK-LABEL: funcB -; CHECK: vcvtsi2sdlz (% +; CHECK: vcvtsi2sdl (%{{.*}} encoding: [0x62 ; CHECK: ret define double @funcB(i32* %e) { entry: @@ -87,7 +87,7 @@ entry: } ; CHECK-LABEL: funcC -; CHECK: vcvtsi2sslz (% +; CHECK: vcvtsi2ssl (%{{.*}} encoding: [0x62 ; CHECK: ret define float @funcC(i32* %e) { entry: @@ -97,7 +97,7 @@ entry: } ; CHECK-LABEL: i64tof32 -; CHECK: vcvtsi2ssqz (% +; CHECK: vcvtsi2ssq (%{{.*}} encoding: [0x62 ; CHECK: ret define float @i64tof32(i64* %e) { entry: @@ -107,7 +107,7 @@ entry: } ; CHECK-LABEL: fpext -; CHECK: vcvtss2sdz +; CHECK: vcvtss2sd {{.*}} encoding: [0x62 ; CHECK: ret define void @fpext() { entry: @@ -120,9 +120,9 @@ entry: } ; CHECK-LABEL: fpround_scalar -; CHECK: vmovsdz -; CHECK: vcvtsd2ssz -; CHECK: vmovssz +; CHECK: vmovsd {{.*}} encoding: [0x62 +; CHECK: vcvtsd2ss {{.*}} encoding: [0x62 +; CHECK: vmovss {{.*}} encoding: [0x62 ; CHECK: ret define void @fpround_scalar() nounwind uwtable { entry: @@ -135,7 +135,7 @@ entry: } ; CHECK-LABEL: long_to_double -; CHECK: vmovqz +; CHECK: vmovq {{.*}} encoding: [0x62 ; CHECK: ret define double @long_to_double(i64 %x) { %res = bitcast i64 %x to double @@ -143,7 +143,7 @@ define double @long_to_double(i64 %x) { } ; CHECK-LABEL: double_to_long -; CHECK: vmovqz +; CHECK: vmovq {{.*}} encoding: [0x62 ; CHECK: ret define i64 @double_to_long(double %x) { %res = bitcast double %x to i64 @@ -151,7 +151,7 @@ define i64 @double_to_long(double %x) { } ; CHECK-LABEL: int_to_float -; CHECK: vmovdz +; CHECK: vmovd {{.*}} encoding: [0x62 ; CHECK: ret define float @int_to_float(i32 %x) { %res = bitcast i32 %x to float @@ -159,7 +159,7 @@ define float @int_to_float(i32 %x) { } ; CHECK-LABEL: float_to_int -; CHECK: vmovdz +; CHECK: vmovd {{.*}} encoding: [0x62 ; CHECK: ret define i32 @float_to_int(float %x) { %res = bitcast float %x to i32 @@ -185,7 +185,7 @@ define <16 x float> @uitof32(<16 x i32> %a) nounwind { } ; CHECK-LABEL: @fptosi02 -; CHECK vcvttss2siz +; CHECK vcvttss2si {{.*}} encoding: [0x62 ; CHECK: ret define i32 @fptosi02(float %a) nounwind { %b = fptosi float %a to i32 @@ -193,7 +193,7 @@ define i32 @fptosi02(float %a) nounwind { } ; CHECK-LABEL: @fptoui02 -; CHECK vcvttss2usiz +; CHECK vcvttss2usi {{.*}} encoding: [0x62 ; CHECK: ret define i32 @fptoui02(float %a) nounwind { %b = fptoui float %a to i32 diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll index 3f067401ed3..ef6359b4d9e 100644 --- a/test/CodeGen/X86/avx512-insert-extract.ll +++ b/test/CodeGen/X86/avx512-insert-extract.ll @@ -44,7 +44,7 @@ define <8 x i64> @test4(<8 x i64> %x) nounwind { } ;CHECK-LABEL: test5: -;CHECK: vextractpsz +;CHECK: vextractps ;CHECK: ret define i32 @test5(<4 x float> %x) nounwind { %ef = extractelement <4 x float> %x, i32 3 @@ -53,7 +53,7 @@ define i32 @test5(<4 x float> %x) nounwind { } ;CHECK-LABEL: test6: -;CHECK: vextractpsz {{.*}}, (%rdi) +;CHECK: vextractps {{.*}}, (%rdi) ;CHECK: ret define void @test6(<4 x float> %x, float* %out) nounwind { %ef = extractelement <4 x float> %x, i32 3 @@ -62,7 +62,7 @@ define void @test6(<4 x float> %x, float* %out) nounwind { } ;CHECK-LABEL: test7 -;CHECK: vmovdz +;CHECK: vmovd ;CHECK: vpermps %zmm ;CHECK: ret define float @test7(<16 x float> %x, i32 %ind) nounwind { @@ -71,7 +71,7 @@ define float @test7(<16 x float> %x, i32 %ind) nounwind { } ;CHECK-LABEL: test8 -;CHECK: vmovqz +;CHECK: vmovq ;CHECK: vpermpd %zmm ;CHECK: ret define double @test8(<8 x double> %x, i32 %ind) nounwind { @@ -89,7 +89,7 @@ define float @test9(<8 x float> %x, i32 %ind) nounwind { } ;CHECK-LABEL: test10 -;CHECK: vmovdz +;CHECK: vmovd ;CHECK: vpermd %zmm ;CHEKK: vmovdz %xmm0, %eax ;CHECK: ret diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index cd67cd823e2..883acb404ef 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone ; CHECK-LABEL: test_kortestz @@ -147,42 +147,42 @@ define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) { declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>) nounwind readnone define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: vsqrtssz + ; CHECK: vsqrtss {{.*}}encoding: [0x62 %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: vsqrtsdz + ; CHECK: vsqrtsd {{.*}}encoding: [0x62 %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res } declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) { - ; CHECK: vcvtsd2siz + ; CHECK: vcvtsd2si {{.*}}encoding: [0x62 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; [#uses=1] ret i64 %res } declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) { - ; CHECK: vcvtsi2sdqz + ; CHECK: vcvtsi2sdq {{.*}}encoding: [0x62 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res } declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone define <2 x double> @test_x86_avx512_cvtusi642sd(<2 x double> %a0, i64 %a1) { - ; CHECK: vcvtusi2sdqz + ; CHECK: vcvtusi2sdq {{.*}}encoding: [0x62 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res } declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64) nounwind readnone define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) { - ; CHECK: vcvttsd2siz + ; CHECK: vcvttsd2si {{.*}}encoding: [0x62 %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; [#uses=1] ret i64 %res } @@ -190,7 +190,7 @@ declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) { - ; CHECK: vcvtss2siz + ; CHECK: vcvtss2si {{.*}}encoding: [0x62 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; [#uses=1] ret i64 %res } @@ -198,7 +198,7 @@ declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) { - ; CHECK: vcvtsi2ssqz + ; CHECK: vcvtsi2ssq {{.*}}encoding: [0x62 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -206,14 +206,14 @@ declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) { - ; CHECK: vcvttss2siz + ; CHECK: vcvttss2si {{.*}}encoding: [0x62 %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; [#uses=1] ret i64 %res } declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) { - ; CHECK: vcvtsd2usiz + ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; [#uses=1] ret i64 %res } diff --git a/test/CodeGen/X86/avx512-mov.ll b/test/CodeGen/X86/avx512-mov.ll index 91242b1cc12..13e68432447 100644 --- a/test/CodeGen/X86/avx512-mov.ll +++ b/test/CodeGen/X86/avx512-mov.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s ; CHECK-LABEL: @test1 -; CHECK: vmovdz %xmm0, %eax +; CHECK: vmovd %xmm0, %eax ## encoding: [0x62 ; CHECK: ret define i32 @test1(float %x) { %res = bitcast float %x to i32 @@ -9,7 +9,7 @@ define i32 @test1(float %x) { } ; CHECK-LABEL: @test2 -; CHECK: vmovdz %edi +; CHECK: vmovd %edi, %xmm0 ## encoding: [0x62 ; CHECK: ret define <4 x i32> @test2(i32 %x) { %res = insertelement <4 x i32>undef, i32 %x, i32 0 @@ -17,7 +17,7 @@ define <4 x i32> @test2(i32 %x) { } ; CHECK-LABEL: @test3 -; CHECK: vmovqz %rdi +; CHECK: vmovq %rdi, %xmm0 ## encoding: [0x62 ; CHECK: ret define <2 x i64> @test3(i64 %x) { %res = insertelement <2 x i64>undef, i64 %x, i32 0 @@ -25,7 +25,7 @@ define <2 x i64> @test3(i64 %x) { } ; CHECK-LABEL: @test4 -; CHECK: vmovdz (%rdi) +; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62 ; CHECK: ret define <4 x i32> @test4(i32* %x) { %y = load i32* %x @@ -34,7 +34,7 @@ define <4 x i32> @test4(i32* %x) { } ; CHECK-LABEL: @test5 -; CHECK: vmovssz %xmm0, (%rdi) +; CHECK: vmovss %xmm0, (%rdi) ## encoding: [0x62 ; CHECK: ret define void @test5(float %x, float* %y) { store float %x, float* %y, align 4 @@ -42,7 +42,7 @@ define void @test5(float %x, float* %y) { } ; CHECK-LABEL: @test6 -; CHECK: vmovsdz %xmm0, (%rdi) +; CHECK: vmovsd %xmm0, (%rdi) ## encoding: [0x62 ; CHECK: ret define void @test6(double %x, double* %y) { store double %x, double* %y, align 8 @@ -50,7 +50,7 @@ define void @test6(double %x, double* %y) { } ; CHECK-LABEL: @test7 -; CHECK: vmovssz (%rdi), %xmm0 +; CHECK: vmovss (%rdi), %xmm0 ## encoding: [0x62 ; CHECK: ret define float @test7(i32* %x) { %y = load i32* %x @@ -59,7 +59,7 @@ define float @test7(i32* %x) { } ; CHECK-LABEL: @test8 -; CHECK: vmovdz %xmm0, %eax +; CHECK: vmovd %xmm0, %eax ## encoding: [0x62 ; CHECK: ret define i32 @test8(<4 x i32> %x) { %res = extractelement <4 x i32> %x, i32 0 @@ -67,7 +67,7 @@ define i32 @test8(<4 x i32> %x) { } ; CHECK-LABEL: @test9 -; CHECK: vmovqz %xmm0, %rax +; CHECK: vmovq %xmm0, %rax ## encoding: [0x62 ; CHECK: ret define i64 @test9(<2 x i64> %x) { %res = extractelement <2 x i64> %x, i32 0 @@ -75,7 +75,7 @@ define i64 @test9(<2 x i64> %x) { } ; CHECK-LABEL: @test10 -; CHECK: vmovdz (%rdi) +; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62 ; CHECK: ret define <4 x i32> @test10(i32* %x) { %y = load i32* %x, align 4 @@ -84,7 +84,7 @@ define <4 x i32> @test10(i32* %x) { } ; CHECK-LABEL: @test11 -; CHECK: vmovssz (%rdi) +; CHECK: vmovss (%rdi), %xmm0 ## encoding: [0x62 ; CHECK: ret define <4 x float> @test11(float* %x) { %y = load float* %x, align 4 @@ -93,7 +93,7 @@ define <4 x float> @test11(float* %x) { } ; CHECK-LABEL: @test12 -; CHECK: vmovsdz (%rdi) +; CHECK: vmovsd (%rdi), %xmm0 ## encoding: [0x62 ; CHECK: ret define <2 x double> @test12(double* %x) { %y = load double* %x, align 8 @@ -102,7 +102,7 @@ define <2 x double> @test12(double* %x) { } ; CHECK-LABEL: @test13 -; CHECK: vmovqz %rdi +; CHECK: vmovq %rdi, %xmm0 ## encoding: [0x62 ; CHECK: ret define <2 x i64> @test13(i64 %x) { %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0 @@ -110,7 +110,7 @@ define <2 x i64> @test13(i64 %x) { } ; CHECK-LABEL: @test14 -; CHECK: vmovdz %edi +; CHECK: vmovd %edi, %xmm0 ## encoding: [0x62 ; CHECK: ret define <4 x i32> @test14(i32 %x) { %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0 @@ -118,7 +118,7 @@ define <4 x i32> @test14(i32 %x) { } ; CHECK-LABEL: @test15 -; CHECK: vmovdz (%rdi) +; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62 ; CHECK: ret define <4 x i32> @test15(i32* %x) { %y = load i32* %x, align 4 diff --git a/test/CodeGen/X86/avx512-shuffle.ll b/test/CodeGen/X86/avx512-shuffle.ll index e32d62453dc..84a87e23f33 100644 --- a/test/CodeGen/X86/avx512-shuffle.ll +++ b/test/CodeGen/X86/avx512-shuffle.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s ; CHECK: LCP ; CHECK: .long 2 ; CHECK: .long 5 @@ -107,7 +107,7 @@ define <16 x i32> @test11(<16 x i32> %a, <16 x i32>* %b) nounwind { } ; CHECK-LABEL: test12 -; CHECK: vmovlhpsz %xmm +; CHECK: vmovlhps {{.*}}## encoding: [0x62 ; CHECK: ret define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) nounwind { %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -186,7 +186,7 @@ define <16 x float> @test21(<16 x float> %a, <16 x float> %c) { } ; CHECK-LABEL: test22 -; CHECK: vmovhlpsz %xmm +; CHECK: vmovhlps {{.*}}## encoding: [0x62 ; CHECK: ret define <4 x i32> @test22(<4 x i32> %a, <4 x i32> %b) nounwind { %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> diff --git a/test/CodeGen/X86/avx512-vbroadcast-crash.ll b/test/CodeGen/X86/avx512-vbroadcast-crash.ll deleted file mode 100644 index ed617f53606..00000000000 --- a/test/CodeGen/X86/avx512-vbroadcast-crash.ll +++ /dev/null @@ -1,11 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s - -define <16 x i32> @test_vbroadcast() { - ; CHECK: vpbroadcastd -entry: - %0 = sext <16 x i1> zeroinitializer to <16 x i32> - %1 = fcmp uno <16 x float> undef, zeroinitializer - %2 = sext <16 x i1> %1 to <16 x i32> - %3 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> %2 - ret <16 x i32> %3 -} diff --git a/test/CodeGen/X86/avx512-vbroadcast.ll b/test/CodeGen/X86/avx512-vbroadcast.ll index 6f89d6ce234..9c6db11d8f4 100644 --- a/test/CodeGen/X86/avx512-vbroadcast.ll +++ b/test/CodeGen/X86/avx512-vbroadcast.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s ;CHECK-LABEL: _inreg16xi32: ;CHECK: vpbroadcastd {{.*}}, %zmm @@ -19,7 +19,7 @@ define <8 x i64> @_inreg8xi64(i64 %a) { } ;CHECK-LABEL: _inreg16xfloat: -;CHECK: vbroadcastssz {{.*}}, %zmm +;CHECK: vbroadcastss {{.*}}, %zmm ;CHECK: ret define <16 x float> @_inreg16xfloat(float %a) { %b = insertelement <16 x float> undef, float %a, i32 0 @@ -28,7 +28,7 @@ define <16 x float> @_inreg16xfloat(float %a) { } ;CHECK-LABEL: _inreg8xdouble: -;CHECK: vbroadcastsdz {{.*}}, %zmm +;CHECK: vbroadcastsd {{.*}}, %zmm ;CHECK: ret define <8 x double> @_inreg8xdouble(double %a) { %b = insertelement <8 x double> undef, double %a, i32 0 @@ -45,9 +45,20 @@ define <16 x i32> @_xmm16xi32(<16 x i32> %a) { } ;CHECK-LABEL: _xmm16xfloat -;CHECK: vbroadcastssz +;CHECK: vbroadcastss {{.*}}## encoding: [0x62 ;CHECK: ret define <16 x float> @_xmm16xfloat(<16 x float> %a) { %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer ret <16 x float> %b } + +define <16 x i32> @test_vbroadcast() { + ; CHECK: vpbroadcastd +entry: + %0 = sext <16 x i1> zeroinitializer to <16 x i32> + %1 = fcmp uno <16 x float> undef, zeroinitializer + %2 = sext <16 x i1> %1 to <16 x i32> + %3 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> %2 + ret <16 x i32> %3 +} +