X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=include%2Fllvm%2FIR%2FIntrinsicsX86.td;h=87c7c870033f72f87336bdbb3888278a00735a2d;hb=67a38a848e83dfe7038935d7d2b4209b5f3279d8;hp=c7a68daf025527c944a89c847067126fcc472827;hpb=c892bd6a74ba92b5631c89cfac8961a8005b644a;p=oota-llvm.git diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index c7a68daf025..87c7c870033 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -17,6 +17,21 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_int : Intrinsic<[], [llvm_i8_ty]>; } +//===----------------------------------------------------------------------===// +// Read Time Stamp Counter. +let TargetPrefix = "x86" in { + def int_x86_rdtsc : GCCBuiltin<"__builtin_ia32_rdtsc">, + Intrinsic<[llvm_i64_ty], [], []>; + def int_x86_rdtscp : GCCBuiltin<"__builtin_ia32_rdtscp">, + Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>; +} + +// Read Performance-Monitoring Counter. +let TargetPrefix = "x86" in { + def int_x86_rdpmc : GCCBuiltin<"__builtin_ia32_rdpmc">, + Intrinsic<[llvm_i64_ty], [llvm_i32_ty], []>; +} + //===----------------------------------------------------------------------===// // 3DNow! @@ -206,6 +221,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse_cvtsi642ss : GCCBuiltin<"__builtin_ia32_cvtsi642ss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i64_ty], [IntrNoMem]>; + def int_x86_sse_cvtps2pi : GCCBuiltin<"__builtin_ia32_cvtps2pi">, Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_cvttps2pi: GCCBuiltin<"__builtin_ia32_cvttps2pi">, @@ -535,6 +551,8 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[], [], []>; def int_x86_sse2_mfence : GCCBuiltin<"__builtin_ia32_mfence">, Intrinsic<[], [], []>; + def int_x86_sse2_pause : GCCBuiltin<"__builtin_ia32_pause">, + Intrinsic<[], [], []>; } //===----------------------------------------------------------------------===// @@ -655,6 +673,15 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_ssse3_pshuf_b_128 : GCCBuiltin<"__builtin_ia32_pshufb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + def int_x86_sse2_pshuf_d : GCCBuiltin<"__builtin_ia32_pshufd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_sse2_pshufl_w : GCCBuiltin<"__builtin_ia32_pshuflw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_sse2_pshufh_w : GCCBuiltin<"__builtin_ia32_pshufhw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_sse_pshuf_w : GCCBuiltin<"__builtin_ia32_pshufw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i8_ty], [IntrNoMem]>; @@ -859,7 +886,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Vector insert let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_insertps : GCCBuiltin<"__builtin_ia32_insertps128">, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_i32_ty], + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; } @@ -869,13 +896,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_v16i8_ty], [IntrNoMem]>; def int_x86_sse41_pblendw : GCCBuiltin<"__builtin_ia32_pblendw128">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse41_blendpd : GCCBuiltin<"__builtin_ia32_blendpd">, - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty], + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse41_blendps : GCCBuiltin<"__builtin_ia32_blendps">, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty], + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse41_blendvpd : GCCBuiltin<"__builtin_ia32_blendvpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_v2f64_ty], @@ -888,17 +915,17 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Vector dot product let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_dppd : GCCBuiltin<"__builtin_ia32_dppd">, - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_i32_ty], + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem, Commutative]>; def int_x86_sse41_dpps : GCCBuiltin<"__builtin_ia32_dpps">, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_i32_ty], + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem, Commutative]>; } // Vector sum of absolute differences let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw128">, - Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i32_ty], + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i8_ty], [IntrNoMem, Commutative]>; } @@ -936,9 +963,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse42_crc32_32_32 : GCCBuiltin<"__builtin_ia32_crc32si">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_sse42_crc32_64_8 : - Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i8_ty], - [IntrNoMem]>; def int_x86_sse42_crc32_64_64 : GCCBuiltin<"__builtin_ia32_crc32di">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; @@ -1120,16 +1144,37 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_vperm2f128_si256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpermt_d_512: + GCCBuiltin<"__builtin_ia32_vpermt2vard512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, + llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpermt_q_512: + GCCBuiltin<"__builtin_ia32_vpermt2varq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpermt_ps_512: + GCCBuiltin<"__builtin_ia32_vpermt2varps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, + llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpermt_pd_512: + GCCBuiltin<"__builtin_ia32_vpermt2varpd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8i64_ty, + llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; + } // Vector blend let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_blend_pd_256 : GCCBuiltin<"__builtin_ia32_blendpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, - llvm_v4f64_ty, llvm_i32_ty], [IntrNoMem]>; + llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx_blend_ps_256 : GCCBuiltin<"__builtin_ia32_blendps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, - llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>; + llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx_blendv_pd_256 : GCCBuiltin<"__builtin_ia32_blendvpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; @@ -1142,7 +1187,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_dp_ps_256 : GCCBuiltin<"__builtin_ia32_dpps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, - llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>; + llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; } // Vector compare @@ -1199,10 +1244,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>; def int_x86_avx_cvtt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvttps2dq256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; - def int_x86_avx512_cvt_ps2dq_512 : GCCBuiltin<"__builtin_ia32_cvtps2dq512">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty], [IntrNoMem]>; - def int_x86_avx512_cvtdq2_ps_512 : GCCBuiltin<"__builtin_ia32_cvtdq2ps512">, - Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty], [IntrNoMem]>; } // Vector bit test @@ -1252,6 +1293,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_ptestnzc_256 : GCCBuiltin<"__builtin_ia32_ptestnzc256">, Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + def int_x86_avx512_mask_ptestm_d_512 : GCCBuiltin<"__builtin_ia32_ptestmd512">, + Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_ptestm_q_512 : GCCBuiltin<"__builtin_ia32_ptestmq512">, + Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, + llvm_i8_ty], [IntrNoMem]>; } // Vector extract sign mask @@ -1272,27 +1319,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Vector load with broadcast let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx_vbroadcast_ss : - GCCBuiltin<"__builtin_ia32_vbroadcastss">, - Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>; - def int_x86_avx_vbroadcast_sd_256 : - GCCBuiltin<"__builtin_ia32_vbroadcastsd256">, - Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>; - def int_x86_avx_vbroadcast_ss_256 : - GCCBuiltin<"__builtin_ia32_vbroadcastss256">, - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>; def int_x86_avx_vbroadcastf128_pd_256 : GCCBuiltin<"__builtin_ia32_vbroadcastf128_pd256">, Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>; def int_x86_avx_vbroadcastf128_ps_256 : GCCBuiltin<"__builtin_ia32_vbroadcastf128_ps256">, Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>; - def int_x86_avx512_vbroadcast_sd_512 : - GCCBuiltin<"__builtin_ia32_vbroadcastsd512">, - Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>; - def int_x86_avx512_vbroadcast_ss_512 : - GCCBuiltin<"__builtin_ia32_vbroadcastss512">, - Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>; } // SIMD load ops @@ -1325,6 +1357,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_maskload_ps_256 : GCCBuiltin<"__builtin_ia32_maskloadps256">, Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8f32_ty], [IntrReadArgMem]>; + def int_x86_avx512_mask_loadu_ps_512 : GCCBuiltin<"__builtin_ia32_loadups512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty], + [IntrReadArgMem]>; + def int_x86_avx512_mask_loadu_pd_512 : GCCBuiltin<"__builtin_ia32_loadupd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], + [IntrReadArgMem]>; } // Conditional store ops @@ -1343,6 +1381,18 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_maskstoreps256">, Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty, llvm_v8f32_ty], [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_storeu_ps_512 : + GCCBuiltin<"__builtin_ia32_storeups512_mask">, + Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_storeu_pd_512 : + GCCBuiltin<"__builtin_ia32_storeupd512_mask">, + Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_store_ss : + GCCBuiltin<"__builtin_ia32_storess_mask">, + Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; } //===----------------------------------------------------------------------===// @@ -1398,6 +1448,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw256">, Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem, Commutative]>; + def int_x86_avx512_mask_pmulu_dq_512 : GCCBuiltin<"__builtin_ia32_pmuludq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmul_dq_512 : GCCBuiltin<"__builtin_ia32_pmuldq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; } // Vector min, max @@ -1438,6 +1494,30 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_pmins_d : GCCBuiltin<"__builtin_ia32_pminsd256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem, Commutative]>; + def int_x86_avx512_mask_pmaxu_d_512 : GCCBuiltin<"__builtin_ia32_pmaxud512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmaxs_d_512 : GCCBuiltin<"__builtin_ia32_pmaxsd512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmaxu_q_512 : GCCBuiltin<"__builtin_ia32_pmaxuq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, + llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmaxs_q_512 : GCCBuiltin<"__builtin_ia32_pmaxsq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, + llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pminu_d_512 : GCCBuiltin<"__builtin_ia32_pminud512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmins_d_512 : GCCBuiltin<"__builtin_ia32_pminsd512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pminu_q_512 : GCCBuiltin<"__builtin_ia32_pminuq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, + llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmins_q_512 : GCCBuiltin<"__builtin_ia32_pminsq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, + llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; } // Integer shift ops. @@ -1504,19 +1584,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_psrl_dq_bs : GCCBuiltin<"__builtin_ia32_psrldqi256_byteshift">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; - - def int_x86_avx512_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi512">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi512">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_psll_dq_bs : GCCBuiltin<"__builtin_ia32_pslldqi512_byteshift">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_psrl_dq_bs : GCCBuiltin<"__builtin_ia32_psrldqi512_byteshift">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, - llvm_i32_ty], [IntrNoMem]>; } // Pack ops. @@ -1543,6 +1610,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; def int_x86_avx2_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pabs_d_512 : GCCBuiltin<"__builtin_ia32_pabsd512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pabs_q_512 : GCCBuiltin<"__builtin_ia32_pabsq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, + llvm_i8_ty], [IntrNoMem]>; } // Horizontal arithmetic ops @@ -1628,22 +1701,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_pmovzxwq : GCCBuiltin<"__builtin_ia32_pmovzxwq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty], [IntrNoMem]>; - // AVX-512 - def int_x86_avx512_pmovzxbq : GCCBuiltin<"__builtin_ia32_pmovzxbq512">, - Intrinsic<[llvm_v8i64_ty], [llvm_v16i8_ty], - [IntrNoMem]>; - def int_x86_avx512_pmovzxwd : GCCBuiltin<"__builtin_ia32_pmovzxwd512">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16i16_ty], - [IntrNoMem]>; - def int_x86_avx512_pmovzxbd : GCCBuiltin<"__builtin_ia32_pmovzxbd512">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16i8_ty], - [IntrNoMem]>; - def int_x86_avx512_pmovzxwq : GCCBuiltin<"__builtin_ia32_pmovzxwq512">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i16_ty], - [IntrNoMem]>; - def int_x86_avx512_pmovzxdq : GCCBuiltin<"__builtin_ia32_pmovzxdq512">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty], - [IntrNoMem]>; } // Vector blend @@ -1653,13 +1710,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_v32i8_ty], [IntrNoMem]>; def int_x86_avx2_pblendw : GCCBuiltin<"__builtin_ia32_pblendw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, - llvm_i32_ty], [IntrNoMem]>; + llvm_i8_ty], [IntrNoMem]>; def int_x86_avx2_pblendd_128 : GCCBuiltin<"__builtin_ia32_pblendd128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, - llvm_i32_ty], [IntrNoMem]>; + llvm_i8_ty], [IntrNoMem]>; def int_x86_avx2_pblendd_256 : GCCBuiltin<"__builtin_ia32_pblendd256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, - llvm_i32_ty], [IntrNoMem]>; + llvm_i8_ty], [IntrNoMem]>; } // Vector load with broadcast @@ -1670,15 +1727,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_vbroadcast_sd_pd_256 : GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_avx512_vbroadcast_sd_pd_512 : - GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd512">, - Intrinsic<[llvm_v8f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_avx2_vbroadcast_ss_ps_256 : GCCBuiltin<"__builtin_ia32_vbroadcastss_ps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_avx512_vbroadcast_ss_ps_512 : - GCCBuiltin<"__builtin_ia32_vbroadcastss_ps512">, - Intrinsic<[llvm_v16f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_avx2_vbroadcasti128 : Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadArgMem]>; def int_x86_avx2_pbroadcastb_128 : @@ -1705,6 +1756,18 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_pbroadcastq_256 : GCCBuiltin<"__builtin_ia32_pbroadcastq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pbroadcast_d_gpr_512 : + GCCBuiltin<"__builtin_ia32_pbroadcastd512_gpr_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty, llvm_v16i32_ty, + llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pbroadcast_q_gpr_512 : + GCCBuiltin<"__builtin_ia32_pbroadcastq512_gpr_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty, llvm_v8i64_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pbroadcast_q_mem_512 : + GCCBuiltin<"__builtin_ia32_pbroadcastq512_mem_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty, llvm_v8i64_ty, + llvm_i8_ty], [IntrNoMem]>; } // Vector permutation @@ -1728,6 +1791,23 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_vinserti128 : GCCBuiltin<"__builtin_ia32_insert128i256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vextractf32x4_512 : + GCCBuiltin<"__builtin_ia32_extractf32x4_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i8_ty, + llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vextracti32x4_512 : + GCCBuiltin<"__builtin_ia32_extracti32x4_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v16i32_ty, llvm_i8_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vextractf64x4_512 : + GCCBuiltin<"__builtin_ia32_extractf64x4_mask">, + Intrinsic<[llvm_v4f64_ty], [llvm_v8f64_ty, llvm_i8_ty, + llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vextracti64x4_512 : + GCCBuiltin<"__builtin_ia32_extracti64x4_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i64_ty, llvm_i8_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; } // Conditional load ops @@ -1744,6 +1824,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_maskload_q_256 : GCCBuiltin<"__builtin_ia32_maskloadq256">, Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty], [IntrReadArgMem]>; + def int_x86_avx512_mask_loadu_d_512 : GCCBuiltin<"__builtin_ia32_loaddqusi512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrReadArgMem]>; + def int_x86_avx512_mask_loadu_q_512 : GCCBuiltin<"__builtin_ia32_loaddqudi512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrReadArgMem]>; } // Conditional store ops @@ -1762,6 +1848,14 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_maskstoreq256">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_storeu_d_512 : + GCCBuiltin<"__builtin_ia32_storedqusi512_mask">, + Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_storeu_q_512 : + GCCBuiltin<"__builtin_ia32_storedqudi512_mask">, + Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; } // Variable bit shift ops @@ -1805,68 +1899,68 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_gather_d_pd : GCCBuiltin<"__builtin_ia32_gatherd_pd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_q_pd_256 : GCCBuiltin<"__builtin_ia32_gatherq_pd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_d_ps : GCCBuiltin<"__builtin_ia32_gatherd_ps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_d_ps_256 : GCCBuiltin<"__builtin_ia32_gatherd_ps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_q_ps : GCCBuiltin<"__builtin_ia32_gatherq_ps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_d_q : GCCBuiltin<"__builtin_ia32_gatherd_q">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_d_q_256 : GCCBuiltin<"__builtin_ia32_gatherd_q256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_q_q : GCCBuiltin<"__builtin_ia32_gatherq_q">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_q_q_256 : GCCBuiltin<"__builtin_ia32_gatherq_q256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_d_d : GCCBuiltin<"__builtin_ia32_gatherd_d">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_q_d : GCCBuiltin<"__builtin_ia32_gatherq_d">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; def int_x86_avx2_gather_q_d_256 : GCCBuiltin<"__builtin_ia32_gatherq_d256">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrReadMem]>; + [IntrReadArgMem]>; } // Misc. @@ -1878,7 +1972,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_v32i8_ty], [IntrNoMem]>; def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty, - llvm_i32_ty], [IntrNoMem, Commutative]>; + llvm_i8_ty], [IntrNoMem, Commutative]>; def int_x86_avx2_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa256">, Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>; } @@ -1911,6 +2005,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + def int_x86_fma_mask_vfmadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_fma_mask_vfmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; def int_x86_fma_vfmsub_ss : GCCBuiltin<"__builtin_ia32_vfmsubss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], @@ -1935,6 +2039,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + def int_x86_fma_mask_vfmsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfmsubps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_fma_mask_vfmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; def int_x86_fma_vfnmadd_ss : GCCBuiltin<"__builtin_ia32_vfnmaddss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], @@ -1959,6 +2073,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + def int_x86_fma_mask_vfnmadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfnmaddps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_fma_mask_vfnmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmaddpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; def int_x86_fma_vfnmsub_ss : GCCBuiltin<"__builtin_ia32_vfnmsubss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], @@ -1983,6 +2107,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + def int_x86_fma_mask_vfnmsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfnmsubps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_fma_mask_vfnmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmsubpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; def int_x86_fma_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], @@ -2001,6 +2135,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + def int_x86_fma_mask_vfmaddsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_fma_mask_vfmaddsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; def int_x86_fma_vfmsubadd_ps : GCCBuiltin<"__builtin_ia32_vfmsubaddps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], @@ -2019,6 +2163,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + def int_x86_fma_mask_vfmsubadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfmsubaddps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_fma_mask_vfmsubadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; } //===----------------------------------------------------------------------===// @@ -2591,6 +2745,22 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256">, Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vcvtph2ps_512 : GCCBuiltin<"__builtin_ia32_vcvtph2ps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16i16_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty, + llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; +} + +//===----------------------------------------------------------------------===// +// TBM + +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_tbm_bextri_u32 : GCCBuiltin<"__builtin_ia32_bextri_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_tbm_bextri_u64 : GCCBuiltin<"__builtin_ia32_bextri_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// @@ -2609,6 +2779,30 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_rdseed_64 : Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>; } +//===----------------------------------------------------------------------===// +// ADX + +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_addcarryx_u32: GCCBuiltin<"__builtin_ia32_addcarryx_u32">, + Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty, + llvm_ptr_ty], [IntrReadWriteArgMem]>; + def int_x86_addcarryx_u64: GCCBuiltin<"__builtin_ia32_addcarryx_u64">, + Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty, + llvm_ptr_ty], [IntrReadWriteArgMem]>; + def int_x86_addcarry_u32: GCCBuiltin<"__builtin_ia32_addcarry_u32">, + Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty, + llvm_ptr_ty], [IntrReadWriteArgMem]>; + def int_x86_addcarry_u64: GCCBuiltin<"__builtin_ia32_addcarry_u64">, + Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty, + llvm_ptr_ty], [IntrReadWriteArgMem]>; + def int_x86_subborrow_u32: GCCBuiltin<"__builtin_ia32_subborrow_u32">, + Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty, + llvm_ptr_ty], [IntrReadWriteArgMem]>; + def int_x86_subborrow_u64: GCCBuiltin<"__builtin_ia32_subborrow_u64">, + Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty, + llvm_ptr_ty], [IntrReadWriteArgMem]>; +} + //===----------------------------------------------------------------------===// // RTM intrinsics. Transactional Memory support. @@ -2622,62 +2816,187 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_xtest : GCCBuiltin<"__builtin_ia32_xtest">, Intrinsic<[llvm_i32_ty], [], []>; } -// AVX-512 +//===----------------------------------------------------------------------===// +// AVX512 + +// Mask ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Mask instructions // 16-bit mask - def int_x86_kadd_v16i1 : GCCBuiltin<"__builtin_ia32_kaddw">, - Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty], - [IntrNoMem]>; - def int_x86_kand_v16i1 : GCCBuiltin<"__builtin_ia32_kandw">, - Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty], + def int_x86_avx512_kand_w : GCCBuiltin<"__builtin_ia32_kandhi">, + Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_kandn_v16i1 : GCCBuiltin<"__builtin_ia32_kandnw">, - Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty], + def int_x86_avx512_kandn_w : GCCBuiltin<"__builtin_ia32_kandnhi">, + Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_knot_v16i1 : GCCBuiltin<"__builtin_ia32_knotw">, - Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty], [IntrNoMem]>; - def int_x86_kor_v16i1 : GCCBuiltin<"__builtin_ia32_korw">, - Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty], + def int_x86_avx512_knot_w : GCCBuiltin<"__builtin_ia32_knothi">, + Intrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_kor_w : GCCBuiltin<"__builtin_ia32_korhi">, + Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_kxor_v16i1 : GCCBuiltin<"__builtin_ia32_kxorw">, - Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty], + def int_x86_avx512_kxor_w : GCCBuiltin<"__builtin_ia32_kxorhi">, + Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_kxnor_v16i1 : GCCBuiltin<"__builtin_ia32_kxnorw">, - Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty], + def int_x86_avx512_kxnor_w : GCCBuiltin<"__builtin_ia32_kxnorhi">, + Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_mask2int_v16i1 : GCCBuiltin<"__builtin_ia32_mask2intw">, - Intrinsic<[llvm_i32_ty], [llvm_v16i1_ty], [IntrNoMem]>; - def int_x86_int2mask_v16i1 : GCCBuiltin<"__builtin_ia32_int2maskw">, - Intrinsic<[llvm_v16i1_ty], [llvm_i32_ty], [IntrNoMem]>; - def int_x86_kunpck_v16i1 : GCCBuiltin<"__builtin_ia32_kunpckbw">, - Intrinsic<[llvm_v16i1_ty], [llvm_v8i1_ty, llvm_v8i1_ty], + def int_x86_avx512_kunpck_bw : GCCBuiltin<"__builtin_ia32_kunpckhi">, + Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_kortestz : GCCBuiltin<"__builtin_ia32_kortestz">, + def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">, Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_kortestc : GCCBuiltin<"__builtin_ia32_kortestc">, + def int_x86_avx512_kortestc_w : GCCBuiltin<"__builtin_ia32_kortestchi">, Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; } +// Conversion ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx512_cvtss2usi : GCCBuiltin<"__builtin_ia32_cvtss2usi">, + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; + def int_x86_avx512_cvtss2usi64 : GCCBuiltin<"__builtin_ia32_cvtss2usi64">, + Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; + def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_cvttss2usi">, + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; + def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_cvttss2usi64">, + Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; + def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_cvtusi642ss : GCCBuiltin<"__builtin_ia32_cvtusi642ss">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, + llvm_i64_ty], [IntrNoMem]>; + + def int_x86_avx512_cvtsd2usi : GCCBuiltin<"__builtin_ia32_cvtsd2usi">, + Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; + def int_x86_avx512_cvtsd2usi64 : GCCBuiltin<"__builtin_ia32_cvtsd2usi64">, + Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_cvttsd2usi">, + Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; + def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_cvttsd2usi64">, + Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + def int_x86_avx512_cvtusi2sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi642sd">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, + llvm_i64_ty], [IntrNoMem]>; +} + +// Vector convert let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx512_min_ps_512 : GCCBuiltin<"__builtin_ia32_minps512">, - Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, - llvm_v16f32_ty], [IntrNoMem]>; - def int_x86_avx512_min_pd_512 : GCCBuiltin<"__builtin_ia32_minpd512">, - Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, - llvm_v8f64_ty], [IntrNoMem]>; - def int_x86_avx512_max_ps_512 : GCCBuiltin<"__builtin_ia32_maxps512">, - Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, - llvm_v16f32_ty], [IntrNoMem]>; - def int_x86_avx512_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512">, - Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, - llvm_v8f64_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cvttps2dq_512: GCCBuiltin<"__builtin_ia32_cvttps2dq512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cvttps2udq_512: GCCBuiltin<"__builtin_ia32_cvttps2udq512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cvttpd2dq_512: GCCBuiltin<"__builtin_ia32_cvttpd2dq512_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cvttpd2udq_512: GCCBuiltin<"__builtin_ia32_cvttpd2udq512_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_rndscale_ps_512: GCCBuiltin<"__builtin_ia32_rndscaleps_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_rndscale_pd_512: GCCBuiltin<"__builtin_ia32_rndscalepd_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cvtps2dq_512: GCCBuiltin<"__builtin_ia32_cvtps2dq512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cvtpd2dq_512: GCCBuiltin<"__builtin_ia32_cvtpd2dq512_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cvtps2udq_512: GCCBuiltin<"__builtin_ia32_cvtps2udq512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cvtpd2udq_512: GCCBuiltin<"__builtin_ia32_cvtpd2udq512_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cvtdq2ps_512 : GCCBuiltin<"__builtin_ia32_cvtdq2ps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cvtdq2pd_512 : GCCBuiltin<"__builtin_ia32_cvtdq2pd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8i32_ty, llvm_v8f64_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cvtudq2ps_512 : GCCBuiltin<"__builtin_ia32_cvtudq2ps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cvtudq2pd_512 : GCCBuiltin<"__builtin_ia32_cvtudq2pd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8i32_ty, llvm_v8f64_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cvtpd2ps_512 : GCCBuiltin<"__builtin_ia32_cvtpd2ps512_mask">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f64_ty, llvm_v8f32_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; } +// Vector load with broadcast let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx512_vbroadcast_ss_512 : + GCCBuiltin<"__builtin_ia32_vbroadcastss512">, + Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>; + def int_x86_avx512_vbroadcast_ss_ps_512 : + GCCBuiltin<"__builtin_ia32_vbroadcastss_ps512">, + Intrinsic<[llvm_v16f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; + + def int_x86_avx512_vbroadcast_sd_512 : + GCCBuiltin<"__builtin_ia32_vbroadcastsd512">, + Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>; + def int_x86_avx512_vbroadcast_sd_pd_512 : + GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd512">, + Intrinsic<[llvm_v8f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + + def int_x86_avx512_pbroadcastd_512 : + GCCBuiltin<"__builtin_ia32_pbroadcastd512">, + Intrinsic<[llvm_v16i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastd_i32_512 : + Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_pbroadcastq_512 : + GCCBuiltin<"__builtin_ia32_pbroadcastq512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastq_i64_512 : + Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty], [IntrNoMem]>; +} + +// Vector sign and zero extend +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx512_pmovzxbq : GCCBuiltin<"__builtin_ia32_pmovzxbq512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_avx512_pmovzxwd : GCCBuiltin<"__builtin_ia32_pmovzxwd512">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i16_ty], + [IntrNoMem]>; + def int_x86_avx512_pmovzxbd : GCCBuiltin<"__builtin_ia32_pmovzxbd512">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_avx512_pmovzxwq : GCCBuiltin<"__builtin_ia32_pmovzxwq512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_avx512_pmovzxdq : GCCBuiltin<"__builtin_ia32_pmovzxdq512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty], + [IntrNoMem]>; +} + +// Arithmetic ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx512_mask_max_ps_512 : GCCBuiltin<"__builtin_ia32_maxps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_min_ps_512 : GCCBuiltin<"__builtin_ia32_minps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_min_pd_512 : GCCBuiltin<"__builtin_ia32_minpd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; @@ -2691,191 +3010,446 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_avx512_rndscale_ps_512 : GCCBuiltin<"__builtin_ia32_rndscaleps512">, - Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, - llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rndscale_pd_512 : GCCBuiltin<"__builtin_ia32_rndscalepd512">, - Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, - llvm_i32_ty], [IntrNoMem]>; - - def int_x86_avx512_sqrt_pd_512 : GCCBuiltin<"__builtin_ia32_sqrtpd512">, - Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty], [IntrNoMem]>; - def int_x86_avx512_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512">, - Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty], [IntrNoMem]>; - - def int_x86_avx512_rcp14_ps_512 : GCCBuiltin<"__builtin_ia32_avx512_rcp14ps512">, - Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty], - [IntrNoMem]>; - def int_x86_avx512_rcp14_pd_512 : GCCBuiltin<"__builtin_ia32_avx512_rcp14pd512">, - Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty], + def int_x86_avx512_sqrt_pd_512 : GCCBuiltin<"__builtin_ia32_sqrtpd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_rsqrt14_pd_512 : GCCBuiltin<"__builtin_ia32_rsqrt14pd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_rsqrt14_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt14ps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_rcp14_pd_512 : GCCBuiltin<"__builtin_ia32_rcp14pd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_rcp14_ps_512 : GCCBuiltin<"__builtin_ia32_rcp14ps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_rcp28_ps : GCCBuiltin<"__builtin_ia32_rcp28ps_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_rcp28_pd : GCCBuiltin<"__builtin_ia32_rcp28pd_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_avx512_rcp14ss">, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_avx512_rcp14sd">, - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], + def int_x86_avx512_rsqrt28_ps : GCCBuiltin<"__builtin_ia32_rsqrt28ps_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt14_ps_512 : GCCBuiltin<"__builtin_ia32_avx512_rsqrt14ps512">, - Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty], + def int_x86_avx512_rsqrt28_pd : GCCBuiltin<"__builtin_ia32_rsqrt28pd_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt14_pd_512 : GCCBuiltin<"__builtin_ia32_avx512_rsqrt14pd512">, - Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty], + def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_avx512_rsqrt14ss">, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], - [IntrNoMem]>; - def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_avx512_rsqrt14sd">, - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], + def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; } +// Integer shift ops. +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx512_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_psll_dq_bs : GCCBuiltin<"__builtin_ia32_pslldqi512_byteshift">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_psrl_dq_bs : GCCBuiltin<"__builtin_ia32_psrldqi512_byteshift">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_i32_ty], [IntrNoMem]>; +} + +// Gather and Scatter ops let TargetPrefix = "x86" in { - def int_x86_avx512_gather_dpd_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdpd512">, - Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i8_ty, - llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem]>; - def int_x86_avx512_gather_dps_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdps512">, - Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i16_ty, - llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem]>; - def int_x86_avx512_gather_qpd_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqpd512">, - Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i8_ty, - llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem]>; - def int_x86_avx512_gather_qps_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqps512">, - Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i8_ty, - llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem]>; - - def int_x86_avx512_gather_dpd_512 : GCCBuiltin<"__builtin_ia32_gatherdpd512">, - Intrinsic<[llvm_v8f64_ty], [llvm_v8i32_ty, llvm_ptr_ty, - llvm_i32_ty], - [IntrReadMem]>; - def int_x86_avx512_gather_dps_512 : GCCBuiltin<"__builtin_ia32_gatherdps512">, - Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_ptr_ty, - llvm_i32_ty], - [IntrReadMem]>; - def int_x86_avx512_gather_qpd_512 : GCCBuiltin<"__builtin_ia32_gatherqpd512">, - Intrinsic<[llvm_v8f64_ty], [llvm_v8i64_ty, llvm_ptr_ty, - llvm_i32_ty], + def int_x86_avx512_gather_dpd_512 : GCCBuiltin<"__builtin_ia32_gathersiv8df">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty, + llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadArgMem]>; - def int_x86_avx512_gather_qps_512 : GCCBuiltin<"__builtin_ia32_gatherqps512">, - Intrinsic<[llvm_v8f32_ty], [llvm_v8i64_ty, llvm_ptr_ty, - llvm_i32_ty], - [IntrReadMem]>; - - def int_x86_avx512_gather_dpq_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdpq512">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i8_ty, - llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem]>; - def int_x86_avx512_gather_dpi_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdpi512">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_i16_ty, - llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty], + def int_x86_avx512_gather_dps_512 : GCCBuiltin<"__builtin_ia32_gathersiv16sf">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_ptr_ty, + llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadArgMem]>; - def int_x86_avx512_gather_qpq_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqpq512">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i8_ty, - llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty], + def int_x86_avx512_gather_qpd_512 : GCCBuiltin<"__builtin_ia32_gatherdiv8df">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty, + llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadArgMem]>; - def int_x86_avx512_gather_qpi_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqpi512">, - Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i8_ty, - llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem]>; - - def int_x86_avx512_gather_dpq_512 : GCCBuiltin<"__builtin_ia32_gatherdpq512">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty, llvm_ptr_ty, - llvm_i32_ty], + def int_x86_avx512_gather_qps_512 : GCCBuiltin<"__builtin_ia32_gatherdiv16sf">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_ptr_ty, + llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadArgMem]>; - def int_x86_avx512_gather_dpi_512 : GCCBuiltin<"__builtin_ia32_gatherdpi512">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_ptr_ty, - llvm_i32_ty], + + + def int_x86_avx512_gather_dpq_512 : GCCBuiltin<"__builtin_ia32_gathersiv8di">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty, + llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadArgMem]>; - def int_x86_avx512_gather_qpq_512 : GCCBuiltin<"__builtin_ia32_gatherqpq512">, + def int_x86_avx512_gather_dpi_512 : GCCBuiltin<"__builtin_ia32_gathersiv16si">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_ptr_ty, + llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], + [IntrReadArgMem]>; + def int_x86_avx512_gather_qpq_512 : GCCBuiltin<"__builtin_ia32_gatherdiv8di">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty, - llvm_i32_ty], + llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadArgMem]>; - def int_x86_avx512_gather_qpi_512 : GCCBuiltin<"__builtin_ia32_gatherqpi512">, - Intrinsic<[llvm_v8i32_ty], [llvm_v8i64_ty, llvm_ptr_ty, - llvm_i32_ty], + def int_x86_avx512_gather_qpi_512 : GCCBuiltin<"__builtin_ia32_gatherdiv16si">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_ptr_ty, + llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadArgMem]>; + // scatter - def int_x86_avx512_scatter_dpd_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterdpd512">, + def int_x86_avx512_scatter_dpd_512 : GCCBuiltin<"__builtin_ia32_scattersiv8df">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8f64_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; - def int_x86_avx512_scatter_dps_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterdps512">, + def int_x86_avx512_scatter_dps_512 : GCCBuiltin<"__builtin_ia32_scattersiv16sf">, Intrinsic<[], [llvm_ptr_ty, llvm_i16_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; - def int_x86_avx512_scatter_qpd_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterqpd512">, + def int_x86_avx512_scatter_qpd_512 : GCCBuiltin<"__builtin_ia32_scatterdiv8df">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_v8f64_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; - def int_x86_avx512_scatter_qps_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterqps512">, + def int_x86_avx512_scatter_qps_512 : GCCBuiltin<"__builtin_ia32_scatterdiv16sf">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_v8f32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; - def int_x86_avx512_scatter_dpd_512 : GCCBuiltin<"__builtin_ia32_scatterdpd512">, - Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f64_ty, - llvm_i32_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_scatter_dps_512 : GCCBuiltin<"__builtin_ia32_scatterdps512">, - Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_v16f32_ty, - llvm_i32_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_scatter_qpd_512 : GCCBuiltin<"__builtin_ia32_scatterqpd512">, - Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_v8f64_ty, - llvm_i32_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_scatter_qps_512 : GCCBuiltin<"__builtin_ia32_scatterqps512">, - Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_v8f32_ty, - llvm_i32_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_scatter_dpq_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterdpq512">, - Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, - llvm_v8i64_ty, llvm_i32_ty], + def int_x86_avx512_scatter_dpq_512 : GCCBuiltin<"__builtin_ia32_scattersiv8di">, + Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, + llvm_v8i32_ty, llvm_v8i64_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; - def int_x86_avx512_scatter_dpi_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterdpi512">, + def int_x86_avx512_scatter_dpi_512 : GCCBuiltin<"__builtin_ia32_scattersiv16si">, Intrinsic<[], [llvm_ptr_ty, llvm_i16_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; - def int_x86_avx512_scatter_qpq_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterqpq512">, - Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, - llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty], + def int_x86_avx512_scatter_qpq_512 : GCCBuiltin<"__builtin_ia32_scatterdiv8di">, + Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,llvm_v8i64_ty, llvm_v8i64_ty, + llvm_i32_ty], [IntrReadWriteArgMem]>; - def int_x86_avx512_scatter_qpi_mask_512 : GCCBuiltin<"__builtin_ia32_mask_scatterqpi512">, - Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, - llvm_v8i64_ty, llvm_v8i32_ty, llvm_i32_ty], + def int_x86_avx512_scatter_qpi_512 : GCCBuiltin<"__builtin_ia32_scatterdiv16si">, + Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_v8i32_ty, + llvm_i32_ty], [IntrReadWriteArgMem]>; - def int_x86_avx512_scatter_dpq_512 : GCCBuiltin<"__builtin_ia32_scatterdpq512">, - Intrinsic<[], [llvm_ptr_ty, - llvm_v8i32_ty, llvm_v8i64_ty, llvm_i32_ty], + // gather prefetch + def int_x86_avx512_gatherpf_dpd_512 : GCCBuiltin<"__builtin_ia32_gatherpfdpd">, + Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty, + llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; + def int_x86_avx512_gatherpf_dps_512 : GCCBuiltin<"__builtin_ia32_gatherpfdps">, + Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty, + llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; + def int_x86_avx512_gatherpf_qpd_512 : GCCBuiltin<"__builtin_ia32_gatherpfqpd">, + Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, + llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; + def int_x86_avx512_gatherpf_qps_512 : GCCBuiltin<"__builtin_ia32_gatherpfqps">, + Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, + llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; + + // scatter prefetch + def int_x86_avx512_scatterpf_dpd_512 : GCCBuiltin<"__builtin_ia32_scatterpfdpd">, + Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty, + llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; + def int_x86_avx512_scatterpf_dps_512 : GCCBuiltin<"__builtin_ia32_scatterpfdps">, + Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty, + llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; + def int_x86_avx512_scatterpf_qpd_512 : GCCBuiltin<"__builtin_ia32_scatterpfqpd">, + Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, + llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; + def int_x86_avx512_scatterpf_qps_512 : GCCBuiltin<"__builtin_ia32_scatterpfqps">, + Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, + llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; +} + +// AVX-512 conflict detection +let TargetPrefix = "x86" in { + def int_x86_avx512_mask_conflict_d_512 : + GCCBuiltin<"__builtin_ia32_vpconflictsi_512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], []>; - def int_x86_avx512_scatter_dpi_512 : GCCBuiltin<"__builtin_ia32_scatterdpi512">, - Intrinsic<[], [llvm_ptr_ty, - llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty], + def int_x86_avx512_mask_conflict_q_512 : + GCCBuiltin<"__builtin_ia32_vpconflictdi_512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_v8i64_ty, llvm_i8_ty], []>; - def int_x86_avx512_scatter_qpq_512 : GCCBuiltin<"__builtin_ia32_scatterqpq512">, - Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_v8i64_ty, - llvm_i32_ty], + def int_x86_avx512_mask_lzcnt_d_512 : + GCCBuiltin<"__builtin_ia32_vplzcntd_512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], []>; - def int_x86_avx512_scatter_qpi_512 : GCCBuiltin<"__builtin_ia32_scatterqpi512">, - Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_v8i32_ty, - llvm_i32_ty], + def int_x86_avx512_mask_lzcnt_q_512 : + GCCBuiltin<"__builtin_ia32_vplzcntq_512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_v8i64_ty, llvm_i8_ty], []>; } -let TargetPrefix = "x86" in { - def int_x86_avx512_mskblend_ps_512 : GCCBuiltin<"__builtin_ia32_avx512_mskblendps512">, +// Vector blend +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx512_mask_blend_ps_512 : GCCBuiltin<"__builtin_ia32_blendmps_512_mask">, Intrinsic<[llvm_v16f32_ty], - [llvm_i16_ty, llvm_v16f32_ty, llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_blend_pd_512 : GCCBuiltin<"__builtin_ia32_blendmpd_512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_blend_d_512 : GCCBuiltin<"__builtin_ia32_blendmd_512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_blend_q_512 : GCCBuiltin<"__builtin_ia32_blendmq_512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrNoMem]>; +} + +let TargetPrefix = "x86" in { + def int_x86_avx512_mask_valign_q_512 : GCCBuiltin<"__builtin_ia32_alignq512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_valign_d_512 : GCCBuiltin<"__builtin_ia32_alignd512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_cmpeq_pi_512 : GCCBuiltin<"__builtin_ia32_avx512_cmpeqpi512">, - Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty], +} + +// Compares +let TargetPrefix = "x86" in { + // 512-bit + def int_x86_avx512_mask_pcmpeq_b_512 : GCCBuiltin<"__builtin_ia32_pcmpeqb512_mask">, + Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpeq_w_512 : GCCBuiltin<"__builtin_ia32_pcmpeqw512_mask">, + Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpeq_d_512 : GCCBuiltin<"__builtin_ia32_pcmpeqd512_mask">, + Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpeq_q_512 : GCCBuiltin<"__builtin_ia32_pcmpeqq512_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_and_pi : GCCBuiltin<"__builtin_ia32_avx512_andpi512">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty], - [IntrNoMem]>; + + def int_x86_avx512_mask_pcmpgt_b_512: GCCBuiltin<"__builtin_ia32_pcmpgtb512_mask">, + Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpgt_w_512: GCCBuiltin<"__builtin_ia32_pcmpgtw512_mask">, + Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpgt_d_512: GCCBuiltin<"__builtin_ia32_pcmpgtd512_mask">, + Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpgt_q_512: GCCBuiltin<"__builtin_ia32_pcmpgtq512_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cmp_b_512: GCCBuiltin<"__builtin_ia32_cmpb512_mask">, + Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty, + llvm_i64_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cmp_w_512: GCCBuiltin<"__builtin_ia32_cmpw512_mask">, + Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cmp_d_512: GCCBuiltin<"__builtin_ia32_cmpd512_mask">, + Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, + llvm_i16_ty], [IntrNoMem ]>; + def int_x86_avx512_mask_cmp_q_512: GCCBuiltin<"__builtin_ia32_cmpq512_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_ucmp_b_512: GCCBuiltin<"__builtin_ia32_ucmpb512_mask">, + Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty, + llvm_i64_ty], [IntrNoMem]>; + def int_x86_avx512_mask_ucmp_w_512: GCCBuiltin<"__builtin_ia32_ucmpw512_mask">, + Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_ucmp_d_512: GCCBuiltin<"__builtin_ia32_ucmpd512_mask">, + Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, + llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_ucmp_q_512: GCCBuiltin<"__builtin_ia32_ucmpq512_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + // 256-bit + def int_x86_avx512_mask_pcmpeq_b_256 : GCCBuiltin<"__builtin_ia32_pcmpeqb256_mask">, + Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpeq_w_256 : GCCBuiltin<"__builtin_ia32_pcmpeqw256_mask">, + Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpeq_d_256 : GCCBuiltin<"__builtin_ia32_pcmpeqd256_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpeq_q_256 : GCCBuiltin<"__builtin_ia32_pcmpeqq256_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pcmpgt_b_256: GCCBuiltin<"__builtin_ia32_pcmpgtb256_mask">, + Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpgt_w_256: GCCBuiltin<"__builtin_ia32_pcmpgtw256_mask">, + Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpgt_d_256: GCCBuiltin<"__builtin_ia32_pcmpgtd256_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpgt_q_256: GCCBuiltin<"__builtin_ia32_pcmpgtq256_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cmp_b_256: GCCBuiltin<"__builtin_ia32_cmpb256_mask">, + Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cmp_w_256: GCCBuiltin<"__builtin_ia32_cmpw256_mask">, + Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty, + llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cmp_d_256: GCCBuiltin<"__builtin_ia32_cmpd256_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cmp_q_256: GCCBuiltin<"__builtin_ia32_cmpq256_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_ucmp_b_256: GCCBuiltin<"__builtin_ia32_ucmpb256_mask">, + Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_ucmp_w_256: GCCBuiltin<"__builtin_ia32_ucmpw256_mask">, + Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty, + llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_ucmp_d_256: GCCBuiltin<"__builtin_ia32_ucmpd256_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_ucmp_q_256: GCCBuiltin<"__builtin_ia32_ucmpq256_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + // 128-bit + def int_x86_avx512_mask_pcmpeq_b_128 : GCCBuiltin<"__builtin_ia32_pcmpeqb128_mask">, + Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpeq_w_128 : GCCBuiltin<"__builtin_ia32_pcmpeqw128_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpeq_d_128 : GCCBuiltin<"__builtin_ia32_pcmpeqd128_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpeq_q_128 : GCCBuiltin<"__builtin_ia32_pcmpeqq128_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pcmpgt_b_128: GCCBuiltin<"__builtin_ia32_pcmpgtb128_mask">, + Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpgt_w_128: GCCBuiltin<"__builtin_ia32_pcmpgtw128_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpgt_d_128: GCCBuiltin<"__builtin_ia32_pcmpgtd128_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpgt_q_128: GCCBuiltin<"__builtin_ia32_pcmpgtq128_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cmp_b_128: GCCBuiltin<"__builtin_ia32_cmpb128_mask">, + Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cmp_w_128: GCCBuiltin<"__builtin_ia32_cmpw128_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cmp_d_128: GCCBuiltin<"__builtin_ia32_cmpd128_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cmp_q_128: GCCBuiltin<"__builtin_ia32_cmpq128_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_ucmp_b_128: GCCBuiltin<"__builtin_ia32_ucmpb128_mask">, + Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_ucmp_w_128: GCCBuiltin<"__builtin_ia32_ucmpw128_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_ucmp_d_128: GCCBuiltin<"__builtin_ia32_ucmpd128_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_ucmp_q_128: GCCBuiltin<"__builtin_ia32_ucmpq128_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; +} + +// Misc. +let TargetPrefix = "x86" in { + def int_x86_avx512_mask_cmp_ps_512 : GCCBuiltin<"__builtin_ia32_cmpps512_mask">, + Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cmp_pd_512 : GCCBuiltin<"__builtin_ia32_cmppd512_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pand_d_512 : GCCBuiltin<"__builtin_ia32_pandd512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pand_q_512 : GCCBuiltin<"__builtin_ia32_pandq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, + llvm_v8i64_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa512">, + Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty], [IntrReadMem]>; +} + +//===----------------------------------------------------------------------===// +// SHA intrinsics +let TargetPrefix = "x86" in { + def int_x86_sha1rnds4 : GCCBuiltin<"__builtin_ia32_sha1rnds4">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_sha1nexte : GCCBuiltin<"__builtin_ia32_sha1nexte">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_sha1msg1 : GCCBuiltin<"__builtin_ia32_sha1msg1">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_sha1msg2 : GCCBuiltin<"__builtin_ia32_sha1msg2">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_sha256rnds2 : GCCBuiltin<"__builtin_ia32_sha256rnds2">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_sha256msg1 : GCCBuiltin<"__builtin_ia32_sha256msg1">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_sha256msg2 : GCCBuiltin<"__builtin_ia32_sha256msg2">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; }