Misc. SSE2 intrinsics: clflush, lfench, mfence
[oota-llvm.git] / include / llvm / IntrinsicsX86.td
index 48f79d25a242ce30debc0b724cc874f085898dfd..d9ea2b850bdef078b668e0d970d9eb2db2025a52 100644 (file)
@@ -112,16 +112,11 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse_cvtss2si : GCCBuiltin<"__builtin_ia32_cvtss2si">,
               Intrinsic<[llvm_int_ty, llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_cvtps2pi : GCCBuiltin<"__builtin_ia32_cvtps2pi">,
-              Intrinsic<[llvm_v2i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
   def int_x86_sse_cvttss2si : GCCBuiltin<"__builtin_ia32_cvttss2si">,
               Intrinsic<[llvm_int_ty, llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_cvttps2pi : GCCBuiltin<"__builtin_ia32_cvttps2pi">,
-              Intrinsic<[llvm_v2i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
   def int_x86_sse_cvtsi2ss : GCCBuiltin<"__builtin_ia32_cvtsi2ss">,
-              Intrinsic<[llvm_v4f32_ty, llvm_int_ty], [IntrNoMem]>;
-  def int_x86_sse_cvtpi2ps : GCCBuiltin<"__builtin_ia32_cvtpi2ps">,
-              Intrinsic<[llvm_v4f32_ty, llvm_v2i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty,
+                         llvm_int_ty], [IntrNoMem]>;
 }
 
 // SIMD load ops
@@ -255,20 +250,162 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                          llvm_v2f64_ty], [IntrNoMem]>;
 }
 
+// Integer arithmetic ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb128">,
+              Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw128">,
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb128">,
+              Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw128">,
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb128">,
+              Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw128">,
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb128">,
+              Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw128">,
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw128">,
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw128">,
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq128">,
+              Intrinsic<[llvm_v2i64_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd128">,
+              Intrinsic<[llvm_v4i32_ty, llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb128">,
+              Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw128">,
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub128">,
+              Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw128">,
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_pminu_b : GCCBuiltin<"__builtin_ia32_pminub128">,
+              Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw128">,
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw128">,
+              Intrinsic<[llvm_v2i64_ty, llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+}
+
 // Integer shift ops.
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_psll_w :
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psll_d :
+              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psll_q :
+              Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
   def int_x86_sse2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi128">,
               Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
                          llvm_int_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrl_w :
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrl_d :
+              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrl_q :
+              Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
   def int_x86_sse2_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi128">,
               Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
                          llvm_int_ty], [IntrNoMem]>;
+  def int_x86_sse2_psra_w :
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psra_d :
+              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+}
+
+// Integer comparison ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_pcmpeq_b : GCCBuiltin<"__builtin_ia32_pcmpeqb128">,
+              Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_pcmpeq_w : GCCBuiltin<"__builtin_ia32_pcmpeqw128">,
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_pcmpeq_d : GCCBuiltin<"__builtin_ia32_pcmpeqd128">,
+              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_pcmpgt_b : GCCBuiltin<"__builtin_ia32_pcmpgtb128">,
+              Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_pcmpgt_w : GCCBuiltin<"__builtin_ia32_pcmpgtw128">,
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_pcmpgt_d : GCCBuiltin<"__builtin_ia32_pcmpgtd128">,
+              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+}
+
+// Conversion ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_cvtdq2pd : GCCBuiltin<"__builtin_ia32_cvtdq2pd">,
+              Intrinsic<[llvm_v2f64_ty, llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtdq2ps : GCCBuiltin<"__builtin_ia32_cvtdq2ps">,
+              Intrinsic<[llvm_v4f32_ty, llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtpd2dq : GCCBuiltin<"__builtin_ia32_cvtpd2dq">,
+              Intrinsic<[llvm_v4i32_ty, llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvttpd2dq : GCCBuiltin<"__builtin_ia32_cvttpd2dq">,
+              Intrinsic<[llvm_v4i32_ty, llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtpd2ps : GCCBuiltin<"__builtin_ia32_cvtpd2ps">,
+              Intrinsic<[llvm_v4f32_ty, llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtps2dq : GCCBuiltin<"__builtin_ia32_cvtps2dq">,
+              Intrinsic<[llvm_v4i32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvttps2dq : GCCBuiltin<"__builtin_ia32_cvttps2dq">,
+              Intrinsic<[llvm_v4i32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtps2pd : GCCBuiltin<"__builtin_ia32_cvtps2pd">,
+              Intrinsic<[llvm_v2f64_ty, llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtsd2si : GCCBuiltin<"__builtin_ia32_cvtsd2si">,
+              Intrinsic<[llvm_int_ty, llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvttsd2si : GCCBuiltin<"__builtin_ia32_cvttsd2si">,
+              Intrinsic<[llvm_int_ty, llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtsi2sd : GCCBuiltin<"__builtin_ia32_cvtsi2sd">,
+              Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
+                         llvm_int_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtsd2ss : GCCBuiltin<"__builtin_ia32_cvtsd2ss">,
+              Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtss2sd : GCCBuiltin<"__builtin_ia32_cvtss2sd">,
+              Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
 }
 
 // SIMD load ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse2_loadu_pd : GCCBuiltin<"__builtin_ia32_loadupd">,
               Intrinsic<[llvm_v2f64_ty, llvm_ptr_ty], [IntrReadMem]>;
+  def int_x86_sse2_loadu_dq : GCCBuiltin<"__builtin_ia32_loaddqu">,
+              Intrinsic<[llvm_v16i8_ty, llvm_ptr_ty], [IntrReadMem]>;
 }
 
 // SIMD store ops
@@ -276,6 +413,12 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse2_storeu_pd : GCCBuiltin<"__builtin_ia32_storeupd">,
               Intrinsic<[llvm_void_ty, llvm_ptr_ty,
                          llvm_v2f64_ty], [IntrWriteMem]>;
+  def int_x86_sse2_storeu_dq : GCCBuiltin<"__builtin_ia32_storedqu">,
+              Intrinsic<[llvm_void_ty, llvm_ptr_ty,
+                         llvm_v16i8_ty], [IntrWriteMem]>;
+  def int_x86_sse2_storel_dq : GCCBuiltin<"__builtin_ia32_storelv4si">,
+              Intrinsic<[llvm_void_ty, llvm_ptr_ty,
+                         llvm_v4i32_ty], [IntrWriteMem]>;
 }
 
 // Cacheability support ops
@@ -302,13 +445,22 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse2_packuswb_128 : GCCBuiltin<"__builtin_ia32_packuswb128">,
               Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
                          llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_sse2_movmskpd : GCCBuiltin<"__builtin_ia32_movmskpd">,
+  // FIXME: Temporary workaround since 2-wide shuffle is broken.
+  def int_x86_sse2_movl_dq : GCCBuiltin<"__builtin_ia32_movqv4si">,
+              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_movmsk_pd : GCCBuiltin<"__builtin_ia32_movmskpd">,
               Intrinsic<[llvm_int_ty, llvm_v2f64_ty], [IntrNoMem]>;
   def int_x86_sse2_pmovmskb_128 : GCCBuiltin<"__builtin_ia32_pmovmskb128">,
               Intrinsic<[llvm_int_ty, llvm_v16i8_ty], [IntrNoMem]>;
   def int_x86_sse2_maskmov_dqu : GCCBuiltin<"__builtin_ia32_maskmovdqu">,
               Intrinsic<[llvm_void_ty, llvm_v16i8_ty,
                          llvm_v16i8_ty, llvm_ptr_ty], [IntrWriteMem]>;
+  def int_x86_sse2_clflush : GCCBuiltin<"__builtin_ia32_clflush">,
+              Intrinsic<[llvm_void_ty, llvm_ptr_ty], [IntrWriteMem]>;
+  def int_x86_sse2_lfence : GCCBuiltin<"__builtin_ia32_lfence">,
+              Intrinsic<[llvm_void_ty], [IntrWriteMem]>;
+  def int_x86_sse2_mfence : GCCBuiltin<"__builtin_ia32_mfence">,
+              Intrinsic<[llvm_void_ty], [IntrWriteMem]>;
 }
 
 //===----------------------------------------------------------------------===//