From: Igor Breger Date: Wed, 17 Jun 2015 07:23:57 +0000 (+0000) Subject: AVX-512: cvtusi2ss/d intrinsics. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=a06697060521b6de35444ab6359825ac60d72b07;p=oota-llvm.git AVX-512: cvtusi2ss/d intrinsics. Change builtin function name and signature ( add third parameter - rounding mode ). Added tests for intrinsics. Differential Revision: http://reviews.llvm.org/D10473 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@239888 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index ece02751180..f4e03d579fc 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -3097,12 +3097,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_cvttss2usi64">, Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss">, + def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss32">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_cvtusi642ss : GCCBuiltin<"__builtin_ia32_cvtusi642ss">, + llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_cvtusi642ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss64">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, - llvm_i64_ty], [IntrNoMem]>; + llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvtsd2usi : GCCBuiltin<"__builtin_ia32_cvtsd2usi">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; @@ -3112,12 +3112,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_cvttsd2usi64">, Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_avx512_cvtusi2sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd">, + def int_x86_avx512_cvtusi2sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd32">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi642sd">, + def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd64">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, - llvm_i64_ty], [IntrNoMem]>; + llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvtsi2ss32 : GCCBuiltin<"__builtin_ia32_cvtsi2ss32">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, @@ -3131,7 +3131,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; - } // Pack ops. diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index a9ccdb5ca8b..68c18fd5ca7 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -4167,25 +4167,45 @@ defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X, // AVX-512 Scalar convert from sign integer to float/double //===----------------------------------------------------------------------===// -multiclass avx512_vcvtsi opc, RegisterClass SrcRC, RegisterClass DstRC, - X86MemOperand x86memop, string asm> { -let hasSideEffects = 0 in { - def rr : SI opc, SDNode OpNode, RegisterClass SrcRC, + X86VectorVTInfo DstVT, X86MemOperand x86memop, + PatFrag ld_frag, string asm> { + let hasSideEffects = 0 in { + def rr : SI, EVEX_4V; - let mayLoad = 1 in - def rm : SI, EVEX_4V; -} // hasSideEffects = 0 + } // hasSideEffects = 0 + let isCodeGenOnly = 1 in { + def rr_Int : SI, EVEX_4V; + + def rm_Int : SI, EVEX_4V; + }//isCodeGenOnly = 1 } multiclass avx512_vcvtsi_round opc, SDNode OpNode, RegisterClass SrcRC, - X86VectorVTInfo DstVT, X86MemOperand x86memop, string asm> { + X86VectorVTInfo DstVT, string asm> { def rrb_Int : SI opc, SDNode OpNode, RegisterClass SrcRC, } multiclass avx512_vcvtsi_common opc, SDNode OpNode, RegisterClass SrcRC, - X86VectorVTInfo DstVT, X86MemOperand x86memop, string asm> { - defm NAME : avx512_vcvtsi_round, - avx512_vcvtsi, VEX_LIG; + X86VectorVTInfo DstVT, X86MemOperand x86memop, + PatFrag ld_frag, string asm> { + defm NAME : avx512_vcvtsi_round, + avx512_vcvtsi, + VEX_LIG; } let Predicates = [HasAVX512] in { defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32, - v4f32x_info, i32mem, "cvtsi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>; + v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">, + XS, EVEX_CD8<32, CD8VT1>; defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64, - v4f32x_info, i64mem, "cvtsi2ss{q}">, XS, VEX_W, EVEX_CD8<64, CD8VT1>; + v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">, + XS, VEX_W, EVEX_CD8<64, CD8VT1>; defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32, - v2f64x_info, i32mem, "cvtsi2sd{l}">, XD, EVEX_CD8<32, CD8VT1>; + v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">, + XD, EVEX_CD8<32, CD8VT1>; defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64, - v2f64x_info, i64mem, "cvtsi2sd{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; + v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">, + XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; @@ -4227,13 +4253,17 @@ def : Pat<(f64 (sint_to_fp GR64:$src)), (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR32, - v4f32x_info, i32mem, "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>; + v4f32x_info, i32mem, loadi32, + "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR64, - v4f32x_info, i64mem, "cvtusi2ss{q}">, XS, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">, - XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; + v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">, + XS, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86SuintToFpRnd, GR32, v2f64x_info, + i32mem, loadi32, "cvtusi2sd{l}">, + XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR64, - v2f64x_info, i64mem, "cvtusi2sd{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; + v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">, + XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))), (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; @@ -4314,18 +4344,9 @@ let isCodeGenOnly = 1 in { int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W; - defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, - int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}", - SSE_CVT_Scalar, 0>, XS, EVEX_4V; - defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, - int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}", - SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W; defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}", SSE_CVT_Scalar, 0>, XD, EVEX_4V; - defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, - int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}", - SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W; } // isCodeGenOnly = 1 // Convert float/double to signed/unsigned int 32/64 with truncation diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 4937284b6bb..6e1322dbc7f 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -242,10 +242,13 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0), - X86_INTRINSIC_DATA(avx512_cvtsi2sd32, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0), - X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0), - X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0), - X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0), + X86_INTRINSIC_DATA(avx512_cvtsi2sd32, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0), + X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0), + X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0), + X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0), + X86_INTRINSIC_DATA(avx512_cvtusi2ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0), + X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0), + X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), X86_INTRINSIC_DATA(avx512_mask_add_pd_128, INTR_TYPE_2OP_MASK, ISD::FADD, 0), diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 100824e59af..63eee2f7153 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2022,6 +2022,8 @@ struct MemorySanitizerVisitor : public InstVisitor { Value *CopyOp, *ConvertOp; switch (I.getNumArgOperands()) { + case 3: + assert(isa(I.getArgOperand(2)) && "Invalid rounding mode"); case 2: CopyOp = I.getArgOperand(0); ConvertOp = I.getArgOperand(1); diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index cd53770e359..544b3a0b0cb 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -176,13 +176,6 @@ define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) { } declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone -define <2 x double> @test_x86_avx512_cvtusi642sd(<2 x double> %a0, i64 %a1) { - ; CHECK: vcvtusi2sdq {{.*}}encoding: [0x62 - %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] - ret <2 x double> %res -} -declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64) nounwind readnone - define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) { ; CHECK: vcvttsd2si {{.*}}encoding: [0x62 %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; [#uses=1] @@ -2824,6 +2817,103 @@ define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) { } declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone +define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b) +; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq +{ + %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} + +define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr) +; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem: +; CHECK: ## BB#0: +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq +{ + %b = load i32, i32* %ptr + %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} + +define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b) +; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 +; CHECK-NEXT: retq +{ + %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} + +define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, i32* %ptr) +; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0 +; CHECK-NEXT: retq +{ + %b = load i32, i32* %ptr + %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone + +define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b) +; CHECK-LABEL: _mm_cvt_roundu64_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq +{ + %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} + +define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b) +; CHECK-LABEL: _mm_cvtu64_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0 +; CHECK-NEXT: retq +{ + %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind readnone + +define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b) +; CHECK-LABEL: test_x86_avx512_mm_cvtu32_sd: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 +; CHECK-NEXT: retq +{ + %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone + +define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b) +; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq +{ + %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1] + ret <2 x double> %res +} + +define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b) +; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0 +; CHECK-NEXT: retq +{ + %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone + define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) { ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1] %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1,