Added target hook for post-indexed memory ops transformation.
[oota-llvm.git] / include / llvm / IntrinsicsX86.td
index 7634fb677914f71ca6e77cc8e4bcbf4cbc20d274..22c9d294356a04840e0539abe8a0de72bc766acf 100644 (file)
@@ -313,12 +313,58 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // Integer shift ops.
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_psll_w :
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psll_d :
+              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psll_q :
+              Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
   def int_x86_sse2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi128">,
               Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
                          llvm_int_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrl_w :
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrl_d :
+              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrl_q :
+              Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
   def int_x86_sse2_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi128">,
               Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
                          llvm_int_ty], [IntrNoMem]>;
+  def int_x86_sse2_psra_w :
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psra_d :
+              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+}
+
+// Integer comparison ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_pcmpeq_b : GCCBuiltin<"__builtin_ia32_pcmpeqb128">,
+              Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_pcmpeq_w : GCCBuiltin<"__builtin_ia32_pcmpeqw128">,
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_pcmpeq_d : GCCBuiltin<"__builtin_ia32_pcmpeqd128">,
+              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_pcmpgt_b : GCCBuiltin<"__builtin_ia32_pcmpgtb128">,
+              Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_pcmpgt_w : GCCBuiltin<"__builtin_ia32_pcmpgtw128">,
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_pcmpgt_d : GCCBuiltin<"__builtin_ia32_pcmpgtd128">,
+              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
 }
 
 // Conversion ops
@@ -399,7 +445,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse2_packuswb_128 : GCCBuiltin<"__builtin_ia32_packuswb128">,
               Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
                          llvm_v8i16_ty], [IntrNoMem]>;
-  // FIXME: Temporary workaround since 2-wide shuffle is broken.
   def int_x86_sse2_movl_dq : GCCBuiltin<"__builtin_ia32_movqv4si">,
               Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
   def int_x86_sse2_movmsk_pd : GCCBuiltin<"__builtin_ia32_movmskpd">,
@@ -409,11 +454,56 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse2_maskmov_dqu : GCCBuiltin<"__builtin_ia32_maskmovdqu">,
               Intrinsic<[llvm_void_ty, llvm_v16i8_ty,
                          llvm_v16i8_ty, llvm_ptr_ty], [IntrWriteMem]>;
+  def int_x86_sse2_clflush : GCCBuiltin<"__builtin_ia32_clflush">,
+              Intrinsic<[llvm_void_ty, llvm_ptr_ty], [IntrWriteMem]>;
+  def int_x86_sse2_lfence : GCCBuiltin<"__builtin_ia32_lfence">,
+              Intrinsic<[llvm_void_ty], [IntrWriteMem]>;
+  def int_x86_sse2_mfence : GCCBuiltin<"__builtin_ia32_mfence">,
+              Intrinsic<[llvm_void_ty], [IntrWriteMem]>;
+}
+
+// Shuffles.
+// FIXME: Temporary workarounds since 2-wide shuffle is broken.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_movs_d : GCCBuiltin<"__builtin_ia32_movsd">,
+              Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_loadh_pd : GCCBuiltin<"__builtin_ia32_loadhpd">,
+              Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
+                         llvm_ptr_ty], [IntrReadMem]>;
+  def int_x86_sse2_loadl_pd : GCCBuiltin<"__builtin_ia32_loadlpd">,
+              Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
+                         llvm_ptr_ty], [IntrReadMem]>;
+  def int_x86_sse2_shuf_pd : GCCBuiltin<"__builtin_ia32_shufpd">,
+              Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
+                         llvm_v2f64_ty, llvm_int_ty], [IntrNoMem]>;
+  def int_x86_sse2_unpckh_pd : GCCBuiltin<"__builtin_ia32_unpckhpd">,
+              Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_unpckl_pd : GCCBuiltin<"__builtin_ia32_unpcklpd">,
+              Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_punpckh_qdq : GCCBuiltin<"__builtin_ia32_punpckhqdq128">,
+              Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
+                         llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_sse2_punpckl_qdq : GCCBuiltin<"__builtin_ia32_punpcklqdq128">,
+              Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
+                         llvm_v2i64_ty], [IntrNoMem]>;
 }
 
 //===----------------------------------------------------------------------===//
 // SSE3
 
+// Addition / subtraction ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse3_addsub_ps : GCCBuiltin<"__builtin_ia32_addsubps">,
+              Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse3_addsub_pd : GCCBuiltin<"__builtin_ia32_addsubpd">,
+              Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+}
+
 // Horizontal ops.
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse3_hadd_ps : GCCBuiltin<"__builtin_ia32_haddps">,
@@ -429,3 +519,19 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
                          llvm_v2f64_ty], [IntrNoMem]>;
 }
+
+// Specialized unaligned load.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse3_ldu_dq : GCCBuiltin<"__builtin_ia32_lddqu">,
+              Intrinsic<[llvm_v16i8_ty, llvm_ptr_ty], [IntrReadMem]>;
+}
+
+// Thread synchronization ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse3_monitor : GCCBuiltin<"__builtin_ia32_monitor">,
+              Intrinsic<[llvm_void_ty, llvm_ptr_ty,
+                         llvm_uint_ty, llvm_uint_ty], [IntrWriteMem]>;
+  def int_x86_sse3_mwait : GCCBuiltin<"__builtin_ia32_mwait">,
+              Intrinsic<[llvm_void_ty, llvm_uint_ty,
+                         llvm_uint_ty], [IntrWriteMem]>;
+}