[X86][AVX512] extend support in Scalar conversion
authorAsaf Badouh <asaf.badouh@intel.com>
Sun, 20 Sep 2015 14:31:19 +0000 (14:31 +0000)
committerAsaf Badouh <asaf.badouh@intel.com>
Sun, 20 Sep 2015 14:31:19 +0000 (14:31 +0000)
add scalar FP to Int conversion with truncation intrinsics
add scalar conversion FP32 from/to FP64 intrinsics
add rounding mode and SAE mode encoding for these intrinsics

Differential Revision: http://reviews.llvm.org/D12665

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@248117 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/IR/IntrinsicsX86.td
lib/Target/X86/X86InstrAVX512.td
lib/Target/X86/X86InstrFragmentsSIMD.td
lib/Target/X86/X86IntrinsicsInfo.h
test/CodeGen/X86/avx512-intrinsics.ll
test/MC/X86/avx512-encodings.s
test/MC/X86/x86-64-avx512dq.s
test/MC/X86/x86-64-avx512dq_vl.s
test/MC/X86/x86-64-avx512f_vl.s

index 4162abd0d0c7386fb5a65fc738670f3f60fdc9b8..7188e6a0fe2522ce45648643cf7318341645f133 100644 (file)
@@ -3855,10 +3855,14 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
   def int_x86_avx512_cvtss2usi64 : GCCBuiltin<"__builtin_ia32_cvtss2usi64">,
               Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_cvttss2usi">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_cvttss2usi64">,
-              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvttss2si : GCCBuiltin<"__builtin_ia32_vcvttss2si32">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvttss2si64 : GCCBuiltin<"__builtin_ia32_vcvttss2si64">,
+              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_vcvttss2usi32">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_vcvttss2usi64">,
+              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss32">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
                          llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
@@ -3870,10 +3874,14 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
   def int_x86_avx512_cvtsd2usi64 : GCCBuiltin<"__builtin_ia32_cvtsd2usi64">,
               Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_cvttsd2usi">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_cvttsd2usi64">,
-              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvttsd2si : GCCBuiltin<"__builtin_ia32_vcvttsd2si32">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_vcvttsd2si64">,
+              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_vcvttsd2usi32">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_vcvttsd2usi64">,
+              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_cvtusi2sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd32">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
                          llvm_i32_ty], [IntrNoMem]>;
@@ -4222,6 +4230,18 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           [llvm_v8f64_ty, llvm_v8f32_ty,  llvm_i8_ty,  llvm_i32_ty],
           [IntrNoMem]>;
 
+  def int_x86_avx512_mask_cvtsd2ss_round : 
+        GCCBuiltin<"__builtin_ia32_cvtsd2ss_round">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtss2sd_round : 
+        GCCBuiltin<"__builtin_ia32_cvtss2sd_round">,
+          Intrinsic<[llvm_v2f64_ty],
+          [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
+          [IntrNoMem]>;
+
   def int_x86_avx512_mask_cvtpd2ps : 
         GCCBuiltin<"__builtin_ia32_cvtpd2ps_mask">,
           Intrinsic<[llvm_v4f32_ty],
index d0985f112b8394e502ed657112dd152f841140c8..ccbe1f4191243b22d5b91e3529eeb71088a38695 100644 (file)
@@ -145,6 +145,8 @@ def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
 
 // We map scalar types to the smallest (128-bit) vector type
 // with the appropriate element type. This allows to use the same masking logic.
+def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
+def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
 def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
 def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
 
@@ -4598,50 +4600,55 @@ def : Pat<(f64 (uint_to_fp GR64:$src)),
 //===----------------------------------------------------------------------===//
 // AVX-512  Scalar convert from float/double to integer
 //===----------------------------------------------------------------------===//
-multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
-                          Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
-                          string asm> {
-let hasSideEffects = 0 in {
-  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
-              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG,
-              Requires<[HasAVX512]>;
-  let mayLoad = 1 in
-  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
-              !strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG,
-              Requires<[HasAVX512]>;
-} // hasSideEffects = 0
+multiclass avx512_cvt_s_int_round<bits<8> opc, RegisterClass SrcRC, 
+                                  RegisterClass DstRC, Intrinsic Int,
+                           Operand memop, ComplexPattern mem_cpat, string asm> {
+  let hasSideEffects = 0, Predicates = [HasAVX512] in {
+    def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+                [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG;
+    def rb : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
+                !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), []>, 
+                EVEX, VEX_LIG, EVEX_B, EVEX_RC;
+    let mayLoad = 1 in
+    def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
+                !strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG;
+  } // hasSideEffects = 0, Predicates = [HasAVX512] 
 }
-let Predicates = [HasAVX512] in {
+
 // Convert float/double to signed/unsigned int 32/64
-defm VCVTSS2SIZ:    avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
+defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
                                    ssmem, sse_load_f32, "cvtss2si">,
                                    XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2SI64Z:  avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
+defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, VR128X, GR64, 
+                                  int_x86_sse_cvtss2si64,
                                    ssmem, sse_load_f32, "cvtss2si">,
                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2USIZ:   avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
+defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, VR128X, GR32, 
+                                  int_x86_avx512_cvtss2usi,
                                    ssmem, sse_load_f32, "cvtss2usi">,
                                    XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
+defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, VR128X, GR64,
                                    int_x86_avx512_cvtss2usi64, ssmem,
                                    sse_load_f32, "cvtss2usi">, XS, VEX_W,
                                    EVEX_CD8<32, CD8VT1>;
-defm VCVTSD2SIZ:    avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
+defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
                                    sdmem, sse_load_f64, "cvtsd2si">,
                                    XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2SI64Z:  avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
+defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, VR128X, GR64, 
+                                   int_x86_sse2_cvtsd2si64,
                                    sdmem, sse_load_f64, "cvtsd2si">,
                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2USIZ:   avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
+defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, VR128X, GR32, 
+                                   int_x86_avx512_cvtsd2usi,
                                    sdmem, sse_load_f64, "cvtsd2usi">,
                                    XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
+defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, VR128X, GR64,
                                    int_x86_avx512_cvtsd2usi64, sdmem,
                                    sse_load_f64, "cvtsd2usi">, XD, VEX_W,
                                    EVEX_CD8<64, CD8VT1>;
 
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1 , Predicates = [HasAVX512] in {
   defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
             int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
             SSE_CVT_Scalar, 0>, XS, EVEX_4V;
@@ -4658,121 +4665,170 @@ let isCodeGenOnly = 1 in {
   defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
             int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
             SSE_CVT_Scalar, 0>, XD, EVEX_4V;
-} // isCodeGenOnly = 1
+} // isCodeGenOnly = 1, Predicates = [HasAVX512]
 
 // Convert float/double to signed/unsigned int 32/64 with truncation
-let isCodeGenOnly = 1 in {
-  defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
-                                     ssmem, sse_load_f32, "cvttss2si">,
-                                     XS, EVEX_CD8<32, CD8VT1>;
-  defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
-                                     int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
-                                     "cvttss2si">, XS, VEX_W,
-                                     EVEX_CD8<32, CD8VT1>;
-  defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
-                                     sdmem, sse_load_f64, "cvttsd2si">, XD,
-                                     EVEX_CD8<64, CD8VT1>;
-  defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
-                                     int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
-                                     "cvttsd2si">, XD, VEX_W,
-                                     EVEX_CD8<64, CD8VT1>;
-  defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
-                                     int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
-                                     "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>;
-  defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
-                                     int_x86_avx512_cvttss2usi64, ssmem,
-                                     sse_load_f32, "cvttss2usi">, XS, VEX_W,
-                                     EVEX_CD8<32, CD8VT1>;
-  defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
-                                     int_x86_avx512_cvttsd2usi,
-                                     sdmem, sse_load_f64, "cvttsd2usi">, XD,
-                                     EVEX_CD8<64, CD8VT1>;
-  defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
-                                     int_x86_avx512_cvttsd2usi64, sdmem,
-                                     sse_load_f64, "cvttsd2usi">, XD, VEX_W,
-                                     EVEX_CD8<64, CD8VT1>;
-} // isCodeGenOnly = 1
-
-multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
-                         SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
-                         string asm> {
-  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 
+                            X86VectorVTInfo _DstRC, SDNode OpNode, 
+                            SDNode OpNodeRnd>{
+let Predicates = [HasAVX512] in {
+  def rr : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
-  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, EVEX;
+  def rb : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
+                !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
+                []>, EVEX, EVEX_B;
+  def rm : SI<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.MemOp:$src),
               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
-}
-
-defm VCVTTSS2SIZ    : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
-                                  loadf32, "cvttss2si">, XS,
-                                  EVEX_CD8<32, CD8VT1>;
-defm VCVTTSS2USIZ   : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
-                                  loadf32, "cvttss2usi">, XS,
-                                  EVEX_CD8<32, CD8VT1>;
-defm VCVTTSS2SI64Z  : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
-                                  loadf32, "cvttss2si">, XS, VEX_W,
-                                  EVEX_CD8<32, CD8VT1>;
-defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
-                                  loadf32, "cvttss2usi">, XS, VEX_W,
-                                  EVEX_CD8<32, CD8VT1>;
-defm VCVTTSD2SIZ    : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
-                                  loadf64, "cvttsd2si">, XD,
-                                  EVEX_CD8<64, CD8VT1>;
-defm VCVTTSD2USIZ   : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
-                                  loadf64, "cvttsd2usi">, XD,
-                                  EVEX_CD8<64, CD8VT1>;
-defm VCVTTSD2SI64Z  : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
-                                  loadf64, "cvttsd2si">, XD, VEX_W,
-                                  EVEX_CD8<64, CD8VT1>;
-defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
-                                  loadf64, "cvttsd2usi">, XD, VEX_W,
-                                  EVEX_CD8<64, CD8VT1>;
+              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, 
+              EVEX;
+
+  let isCodeGenOnly = 1,hasSideEffects = 0 in {
+      def rr_Int : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
+                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+               [(set _DstRC.RC:$dst, (OpNodeRnd _SrcRC.RC:$src,
+                                     (i32 FROUND_CURRENT)))]>, EVEX, VEX_LIG;
+      def rb_Int : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
+                !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
+                [(set _DstRC.RC:$dst, (OpNodeRnd _SrcRC.RC:$src, 
+                                      (i32 FROUND_NO_EXC)))]>, 
+                                      EVEX,VEX_LIG , EVEX_B;
+      let mayLoad = 1 in
+        def rm_Int : SI<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 
+                    (ins _SrcRC.MemOp:$src),
+                    !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+                    []>, EVEX, VEX_LIG;
+
+  } // isCodeGenOnly = 1, hasSideEffects = 0
+} //HasAVX512
+}
+
+
+defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "cvttss2si", f32x_info, i32x_info, 
+                        fp_to_sint,X86cvttss2IntRnd>, 
+                        XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "cvttss2si", f32x_info, i64x_info, 
+                        fp_to_sint,X86cvttss2IntRnd>, 
+                        VEX_W, XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "cvttsd2si", f64x_info, i32x_info, 
+                        fp_to_sint,X86cvttsd2IntRnd>,
+                        XD, EVEX_CD8<64, CD8VT1>;
+defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "cvttsd2si", f64x_info, i64x_info, 
+                        fp_to_sint,X86cvttsd2IntRnd>, 
+                        VEX_W, XD, EVEX_CD8<64, CD8VT1>;
+
+defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "cvttss2usi", f32x_info, i32x_info, 
+                        fp_to_uint,X86cvttss2UIntRnd>, 
+                        XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "cvttss2usi", f32x_info, i64x_info, 
+                        fp_to_uint,X86cvttss2UIntRnd>, 
+                        XS,VEX_W, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "cvttsd2usi", f64x_info, i32x_info, 
+                        fp_to_uint,X86cvttsd2UIntRnd>, 
+                        XD, EVEX_CD8<64, CD8VT1>;
+defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "cvttsd2usi", f64x_info, i64x_info, 
+                        fp_to_uint,X86cvttsd2UIntRnd>, 
+                        XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+let Predicates = [HasAVX512] in {
+  def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
+            (VCVTTSS2SIZrr_Int (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+  def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
+            (VCVTTSS2SI64Zrr_Int (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+  def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
+            (VCVTTSD2SIZrr_Int (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
+  def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
+            (VCVTTSD2SI64Zrr_Int (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
+
 } // HasAVX512
 //===----------------------------------------------------------------------===//
 // AVX-512  Convert form float to double and back
 //===----------------------------------------------------------------------===//
-let hasSideEffects = 0 in {
-def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst),
-                    (ins FR32X:$src1, FR32X:$src2),
-                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
-let mayLoad = 1 in
-def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst),
-                    (ins FR32X:$src1, f32mem:$src2),
-                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
-                    EVEX_CD8<32, CD8VT1>;
-
-// Convert scalar double to scalar single
-def VCVTSD2SSZrr  : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst),
-                      (ins FR64X:$src1, FR64X:$src2),
-                      "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
-let mayLoad = 1 in
-def VCVTSD2SSZrm  : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
-                      (ins FR64X:$src1, f64mem:$src2),
-                      "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      []>, EVEX_4V, VEX_LIG, VEX_W,
-                      Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
-}
-
-def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>,
-      Requires<[HasAVX512]>;
-def : Pat<(fextend (loadf32 addr:$src)),
-    (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>;
-
-def : Pat<(extloadf32 addr:$src),
-    (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+                         X86VectorVTInfo _Src, SDNode OpNode> {
+  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                         (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 
+                         "$src2, $src1", "$src1, $src2",
+                         (_.VT (OpNode (_Src.VT _Src.RC:$src1),
+                                       (_Src.VT _Src.RC:$src2)))>, 
+                         EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
+  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 
+                         "$src2, $src1", "$src1, $src2",
+                         (_.VT (OpNode (_Src.VT _Src.RC:$src1), 
+                                  (_Src.VT (scalar_to_vector 
+                                            (_Src.ScalarLdFrag addr:$src2)))))>, 
+                         EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+}
+
+// Scalar Coversion with SAE - suppress all exceptions
+multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+                         X86VectorVTInfo _Src, SDNode OpNodeRnd> {
+  defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                        (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
+                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
+                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src1), 
+                                         (_Src.VT _Src.RC:$src2),
+                                         (i32 FROUND_NO_EXC)))>,
+                        EVEX_4V, VEX_LIG, EVEX_B;
+}
+
+// Scalar Conversion with rounding control (RC)
+multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+                         X86VectorVTInfo _Src, SDNode OpNodeRnd> {
+  defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                        (ins _Src.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
+                        "$rc, $src2, $src1", "$src1, $src2, $rc",
+                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src1), 
+                                         (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>,
+                        EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
+                        EVEX_B, EVEX_RC;
+}
+multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr, SDNode OpNode, 
+                                  SDNode OpNodeRnd, X86VectorVTInfo _src, 
+                                                        X86VectorVTInfo _dst> {
+  let Predicates = [HasAVX512] in {
+    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode>,
+             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
+                               OpNodeRnd>, VEX_W, EVEX_CD8<64, CD8VT1>,
+                               EVEX_V512, XD;
+  }
+}
+
+multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNode, 
+                                    SDNode OpNodeRnd, X86VectorVTInfo _src, 
+                                                          X86VectorVTInfo _dst> {
+  let Predicates = [HasAVX512] in {
+    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode>,
+             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>, 
+             EVEX_CD8<32, CD8VT1>, XS, EVEX_V512;
+  }
+}
+defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86fround,
+                                         X86froundRnd, f64x_info, f32x_info>;
+defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpext, 
+                                          X86fpextRnd,f32x_info, f64x_info >;
+
+def : Pat<(f64 (fextend FR32X:$src)), 
+          (COPY_TO_REGCLASS (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, VR128X), 
+                               (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>,
+          Requires<[HasAVX512]>;
+def : Pat<(f64 (fextend (loadf32 addr:$src))),
+          (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
+          Requires<[HasAVX512]>;
+
+def : Pat<(f64 (extloadf32 addr:$src)),
+      (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
       Requires<[HasAVX512, OptForSize]>;
 
-def : Pat<(extloadf32 addr:$src),
-    (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
-    Requires<[HasAVX512, OptForSpeed]>;
+def : Pat<(f64 (extloadf32 addr:$src)),
+          (COPY_TO_REGCLASS (VCVTSS2SDZrr (v4f32 (IMPLICIT_DEF)), 
+                    (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)), VR128X)>,
+          Requires<[HasAVX512, OptForSpeed]>;
 
-def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
+def : Pat<(f32 (fround FR64X:$src)), 
+          (COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X), 
+                    (COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>,
            Requires<[HasAVX512]>;
-
 //===----------------------------------------------------------------------===//
 // AVX-512  Vector convert from signed/unsigned integer to float/double
 //          and from float/double to signed/unsigned integer
index 3853f7405fa3e4481838e99dbd413b2ff6c78303..e0e2864c583dee79264ad013607ada3fc579b637 100644 (file)
@@ -137,6 +137,35 @@ def X86vfpround: SDNode<"X86ISD::VFPROUND",
                                              SDTCisFP<0>, SDTCisFP<1>,
                                              SDTCisOpSmallerThanOp<0, 1>]>>;
 
+def X86fround: SDNode<"X86ISD::VFPROUND",
+                        SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>,
+                                             SDTCVecEltisVT<0, f32>,
+                                             SDTCVecEltisVT<1, f64>,
+                                             SDTCVecEltisVT<2, f64>,
+                                             SDTCisOpSmallerThanOp<0, 1>]>>;
+def X86froundRnd: SDNode<"X86ISD::VFPROUND",
+                        SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>,
+                                             SDTCVecEltisVT<0, f32>,
+                                             SDTCVecEltisVT<1, f64>,
+                                             SDTCVecEltisVT<2, f64>,
+                                             SDTCisOpSmallerThanOp<0, 1>,
+                                             SDTCisInt<3>]>>;
+
+def X86fpext  : SDNode<"X86ISD::VFPEXT",
+                        SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>,
+                                             SDTCVecEltisVT<0, f64>,
+                                             SDTCVecEltisVT<1, f32>,
+                                             SDTCVecEltisVT<2, f32>,
+                                             SDTCisOpSmallerThanOp<1, 0>]>>;
+
+def X86fpextRnd  : SDNode<"X86ISD::VFPEXT",
+                        SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>,
+                                             SDTCVecEltisVT<0, f64>,
+                                             SDTCVecEltisVT<1, f32>,
+                                             SDTCVecEltisVT<2, f32>,
+                                             SDTCisOpSmallerThanOp<1, 0>,
+                                             SDTCisInt<3>]>>;
+
 def X86vshldq  : SDNode<"X86ISD::VSHLDQ",    SDTIntShiftOp>;
 def X86vshrdq  : SDNode<"X86ISD::VSRLDQ",    SDTIntShiftOp>;
 def X86cmpp    : SDNode<"X86ISD::CMPP",      SDTX86VFCMP>;
@@ -390,9 +419,12 @@ def SDTFloatToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
 
 def SDTDoubleToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
                                          SDTCisInt<0>, SDTCVecEltisVT<1, f64>]>;
+def SDTSDoubleToIntRnd: SDTypeProfile<1, 2, [SDTCisInt<0>,SDTCisFP<1>, 
+                                             SDTCVecEltisVT<1, f64>, SDTCisInt<2>]>;
 def SDTFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
                                          SDTCisInt<0>, SDTCVecEltisVT<1, f32>]>;
-
+def SDTSFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisFP<1>,
+                                            SDTCVecEltisVT<1, f32>, SDTCisInt<2>]>;
 def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
                                            SDTCisFP<0>, SDTCVecEltisVT<1, i32>,
                                            SDTCisInt<2>]>;
@@ -411,6 +443,10 @@ def SDTVFPToLongRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
 def X86SintToFpRnd  : SDNode<"X86ISD::SINT_TO_FP_RND",  SDTintToFPRound>;
 def X86UintToFpRnd  : SDNode<"X86ISD::UINT_TO_FP_RND",  SDTintToFPRound>;
 
+def X86cvttss2IntRnd      : SDNode<"X86ISD::FP_TO_SINT_RND",  SDTSFloatToIntRnd>;
+def X86cvttss2UIntRnd     : SDNode<"X86ISD::FP_TO_UINT_RND",  SDTSFloatToIntRnd>;
+def X86cvttsd2IntRnd      : SDNode<"X86ISD::FP_TO_SINT_RND",  SDTSDoubleToIntRnd>;
+def X86cvttsd2UIntRnd     : SDNode<"X86ISD::FP_TO_UINT_RND",  SDTSDoubleToIntRnd>;
 // Vector with rounding mode
 
 // cvtt fp-to-int staff
index 818e29bd460e4d45f6b2defa7320318585ff80ad..75941ea493887d4125466ba2b4b1d78fe2533889 100644 (file)
@@ -318,6 +318,14 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_cvtsi2sd64,  INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
   X86_INTRINSIC_DATA(avx512_cvtsi2ss32,  INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
   X86_INTRINSIC_DATA(avx512_cvtsi2ss64,  INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttsd2si, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttsd2si64, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttsd2usi, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttsd2usi64, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttss2si, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttss2si64, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttss2usi, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttss2usi64, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
   X86_INTRINSIC_DATA(avx512_cvtusi2ss,   INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
   X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
   X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
@@ -551,6 +559,10 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      ISD::SINT_TO_FP, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_512, INTR_TYPE_1OP_MASK,
                      ISD::SINT_TO_FP, ISD::SINT_TO_FP),
+  X86_INTRINSIC_DATA(avx512_mask_cvtsd2ss_round, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::VFPROUND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM, 
+                     X86ISD::VFPEXT, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK,
                      ISD::FP_TO_SINT, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_256, INTR_TYPE_1OP_MASK,
index 3ee98e6e84475577b0a9e2901277597ac3e671f3..18676e4c0a68021fc8b64ad713278938e4a0deea 100644 (file)
@@ -216,12 +216,47 @@ define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
 }
 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
 
-define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
+define i64 @test_x86_avx512_cvttsd2si64(<2 x double> %a0) {
   ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
-  %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
-  ret i64 %res
+  ; CHECK: vcvttsd2si {sae}{{.*}}encoding: [0x62
+  %res0 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 4) ;
+  %res1 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 8) ;
+  %res2 = add i64 %res0, %res1
+  ret i64 %res2
+}
+declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) {
+  ; CHECK: vcvttsd2usi {{.*}}encoding: [0x62
+  ; CHECK: vcvttsd2usi {sae}{{.*}}encoding: [0x62
+  %res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ;
+  %res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ;
+  %res2 = add i32 %res0, %res1
+  ret i32 %res2
+}
+declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) {
+  ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
+  ; CHECK: vcvttsd2si {sae}{{.*}}encoding: [0x62
+  %res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ;
+  %res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ;
+  %res2 = add i32 %res0, %res1
+  ret i32 %res2
 }
-declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
+declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone
+
+
+
+define i64 @test_x86_avx512_cvttsd2usi64(<2 x double> %a0) {
+  ; CHECK: vcvttsd2usi {{.*}}encoding: [0x62
+  ; CHECK: vcvttsd2usi {sae}{{.*}}encoding: [0x62
+  %res0 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 4) ;
+  %res1 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 8) ;
+  %res2 = add i64 %res0, %res1
+  ret i64 %res2
+}
+declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) nounwind readnone
 
 define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
   ; CHECK: vcvtss2si {{.*}}encoding: [0x62
@@ -239,12 +274,45 @@ define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
 
 
-define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
+define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) {
+  ; CHECK: vcvttss2si {sae}{{.*}}encoding: [0x62
   ; CHECK: vcvttss2si {{.*}}encoding: [0x62
-  %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
-  ret i64 %res
+  %res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ;
+  %res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ;
+  %res2 = add i32 %res0, %res1
+  ret i32 %res2
 }
-declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
+declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone
+
+define i64 @test_x86_avx512_cvttss2si64(<4 x float> %a0) {
+  ; CHECK: vcvttss2si {{.*}}encoding: [0x62
+  ; CHECK: vcvttss2si {sae}{{.*}}encoding: [0x62
+  %res0 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 4) ;
+  %res1 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 8) ;
+  %res2 = add i64 %res0, %res1
+  ret i64 %res2
+}
+declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) {
+  ; CHECK: vcvttss2usi {sae}{{.*}}encoding: [0x62
+  ; CHECK: vcvttss2usi {{.*}}encoding: [0x62
+  %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ;
+  %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ;
+  %res2 = add i32 %res0, %res1
+  ret i32 %res2
+}
+declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone
+
+define i64 @test_x86_avx512_cvttss2usi64(<4 x float> %a0) {
+  ; CHECK: vcvttss2usi {{.*}}encoding: [0x62
+  ; CHECK: vcvttss2usi {sae}{{.*}}encoding: [0x62
+  %res0 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 4) ;
+  %res1 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 8) ;
+  %res2 = add i64 %res0, %res1
+  ret i64 %res2
+}
+declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone
 
 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
   ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62
@@ -4272,3 +4340,32 @@ define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i6
   ret <8 x i64> %res4
 }
 
+declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float>, <4 x float>, <2 x double>, i8, i32)
+
+define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<4 x float> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ss2sd_round:
+; CHECK:    kmovw %edi, %k1    
+; CHECK-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm2 {%k1} 
+; CHECK-NEXT:    vcvtss2sd {sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 %x3, i32 4)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 -1, i32 8)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double>, <2 x double>, <4 x float>, i8, i32)
+
+define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<2 x double> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_sd2ss_round:
+; CHECK:    kmovw %edi, %k1
+; CHECK-NEXT:    vcvtsd2ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vcvtsd2ss {rn-sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
index 875e81d881e24848232d11a3437dccc39a41b87d..1505c537af6b1d95480e153427b3bc60d9a13aa4 100644 (file)
@@ -15838,3 +15838,1314 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
 // CHECK:  encoding: [0x62,0xa2,0xfd,0x41,0xa2,0x94,0x81,0x00,0x04,0x00,0x00]
           vscatterdpd %zmm18, 1024(%rcx, %ymm24,4) {%k1}
 
+// CHECK: vcvtpd2dq %zmm15, %ymm24
+// CHECK:  encoding: [0x62,0x41,0xff,0x48,0xe6,0xc7]
+          vcvtpd2dq %zmm15, %ymm24
+
+// CHECK: vcvtpd2dq %zmm15, %ymm24 {%k3}
+// CHECK:  encoding: [0x62,0x41,0xff,0x4b,0xe6,0xc7]
+          vcvtpd2dq %zmm15, %ymm24 {%k3}
+
+// CHECK: vcvtpd2dq %zmm15, %ymm24 {%k3} {z}
+// CHECK:  encoding: [0x62,0x41,0xff,0xcb,0xe6,0xc7]
+          vcvtpd2dq %zmm15, %ymm24 {%k3} {z}
+
+// CHECK: vcvtpd2dq {rn-sae}, %zmm15, %ymm24
+// CHECK:  encoding: [0x62,0x41,0xff,0x18,0xe6,0xc7]
+          vcvtpd2dq {rn-sae}, %zmm15, %ymm24
+
+// CHECK: vcvtpd2dq {ru-sae}, %zmm15, %ymm24
+// CHECK:  encoding: [0x62,0x41,0xff,0x58,0xe6,0xc7]
+          vcvtpd2dq {ru-sae}, %zmm15, %ymm24
+
+// CHECK: vcvtpd2dq {rd-sae}, %zmm15, %ymm24
+// CHECK:  encoding: [0x62,0x41,0xff,0x38,0xe6,0xc7]
+          vcvtpd2dq {rd-sae}, %zmm15, %ymm24
+
+// CHECK: vcvtpd2dq {rz-sae}, %zmm15, %ymm24
+// CHECK:  encoding: [0x62,0x41,0xff,0x78,0xe6,0xc7]
+          vcvtpd2dq {rz-sae}, %zmm15, %ymm24
+
+// CHECK: vcvtpd2dq (%rcx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0xe6,0x01]
+          vcvtpd2dq (%rcx), %ymm24
+
+// CHECK: vcvtpd2dq 291(%rax,%r14,8), %ymm24
+// CHECK:  encoding: [0x62,0x21,0xff,0x48,0xe6,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvtpd2dq 291(%rax,%r14,8), %ymm24
+
+// CHECK: vcvtpd2dq (%rcx){1to8}, %ymm24
+// CHECK:  encoding: [0x62,0x61,0xff,0x58,0xe6,0x01]
+          vcvtpd2dq (%rcx){1to8}, %ymm24
+
+// CHECK: vcvtpd2dq 8128(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0xe6,0x42,0x7f]
+          vcvtpd2dq 8128(%rdx), %ymm24
+
+// CHECK: vcvtpd2dq 8192(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0xe6,0x82,0x00,0x20,0x00,0x00]
+          vcvtpd2dq 8192(%rdx), %ymm24
+
+// CHECK: vcvtpd2dq -8192(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0xe6,0x42,0x80]
+          vcvtpd2dq -8192(%rdx), %ymm24
+
+// CHECK: vcvtpd2dq -8256(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0xe6,0x82,0xc0,0xdf,0xff,0xff]
+          vcvtpd2dq -8256(%rdx), %ymm24
+
+// CHECK: vcvtpd2dq 1016(%rdx){1to8}, %ymm24
+// CHECK:  encoding: [0x62,0x61,0xff,0x58,0xe6,0x42,0x7f]
+          vcvtpd2dq 1016(%rdx){1to8}, %ymm24
+
+// CHECK: vcvtpd2dq 1024(%rdx){1to8}, %ymm24
+// CHECK:  encoding: [0x62,0x61,0xff,0x58,0xe6,0x82,0x00,0x04,0x00,0x00]
+          vcvtpd2dq 1024(%rdx){1to8}, %ymm24
+
+// CHECK: vcvtpd2dq -1024(%rdx){1to8}, %ymm24
+// CHECK:  encoding: [0x62,0x61,0xff,0x58,0xe6,0x42,0x80]
+          vcvtpd2dq -1024(%rdx){1to8}, %ymm24
+
+// CHECK: vcvtpd2dq -1032(%rdx){1to8}, %ymm24
+// CHECK:  encoding: [0x62,0x61,0xff,0x58,0xe6,0x82,0xf8,0xfb,0xff,0xff]
+          vcvtpd2dq -1032(%rdx){1to8}, %ymm24
+          
+// CHECK: vcvtpd2udq %zmm19, %ymm15
+// CHECK:  encoding: [0x62,0x31,0xfc,0x48,0x79,0xfb]
+          vcvtpd2udq %zmm19, %ymm15
+
+// CHECK: vcvtpd2udq %zmm19, %ymm15 {%k7}
+// CHECK:  encoding: [0x62,0x31,0xfc,0x4f,0x79,0xfb]
+          vcvtpd2udq %zmm19, %ymm15 {%k7}
+
+// CHECK: vcvtpd2udq %zmm19, %ymm15 {%k7} {z}
+// CHECK:  encoding: [0x62,0x31,0xfc,0xcf,0x79,0xfb]
+          vcvtpd2udq %zmm19, %ymm15 {%k7} {z}
+
+// CHECK: vcvtpd2udq {rn-sae}, %zmm19, %ymm15
+// CHECK:  encoding: [0x62,0x31,0xfc,0x18,0x79,0xfb]
+          vcvtpd2udq {rn-sae}, %zmm19, %ymm15
+
+// CHECK: vcvtpd2udq {ru-sae}, %zmm19, %ymm15
+// CHECK:  encoding: [0x62,0x31,0xfc,0x58,0x79,0xfb]
+          vcvtpd2udq {ru-sae}, %zmm19, %ymm15
+
+// CHECK: vcvtpd2udq {rd-sae}, %zmm19, %ymm15
+// CHECK:  encoding: [0x62,0x31,0xfc,0x38,0x79,0xfb]
+          vcvtpd2udq {rd-sae}, %zmm19, %ymm15
+
+// CHECK: vcvtpd2udq {rz-sae}, %zmm19, %ymm15
+// CHECK:  encoding: [0x62,0x31,0xfc,0x78,0x79,0xfb]
+          vcvtpd2udq {rz-sae}, %zmm19, %ymm15
+
+// CHECK: vcvtpd2udq (%rcx), %ymm15
+// CHECK:  encoding: [0x62,0x71,0xfc,0x48,0x79,0x39]
+          vcvtpd2udq (%rcx), %ymm15
+
+// CHECK: vcvtpd2udq 291(%rax,%r14,8), %ymm15
+// CHECK:  encoding: [0x62,0x31,0xfc,0x48,0x79,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vcvtpd2udq 291(%rax,%r14,8), %ymm15
+
+// CHECK: vcvtpd2udq (%rcx){1to8}, %ymm15
+// CHECK:  encoding: [0x62,0x71,0xfc,0x58,0x79,0x39]
+          vcvtpd2udq (%rcx){1to8}, %ymm15
+
+// CHECK: vcvtpd2udq 8128(%rdx), %ymm15
+// CHECK:  encoding: [0x62,0x71,0xfc,0x48,0x79,0x7a,0x7f]
+          vcvtpd2udq 8128(%rdx), %ymm15
+
+// CHECK: vcvtpd2udq 8192(%rdx), %ymm15
+// CHECK:  encoding: [0x62,0x71,0xfc,0x48,0x79,0xba,0x00,0x20,0x00,0x00]
+          vcvtpd2udq 8192(%rdx), %ymm15
+
+// CHECK: vcvtpd2udq -8192(%rdx), %ymm15
+// CHECK:  encoding: [0x62,0x71,0xfc,0x48,0x79,0x7a,0x80]
+          vcvtpd2udq -8192(%rdx), %ymm15
+
+// CHECK: vcvtpd2udq -8256(%rdx), %ymm15
+// CHECK:  encoding: [0x62,0x71,0xfc,0x48,0x79,0xba,0xc0,0xdf,0xff,0xff]
+          vcvtpd2udq -8256(%rdx), %ymm15
+
+// CHECK: vcvtpd2udq 1016(%rdx){1to8}, %ymm15
+// CHECK:  encoding: [0x62,0x71,0xfc,0x58,0x79,0x7a,0x7f]
+          vcvtpd2udq 1016(%rdx){1to8}, %ymm15
+
+// CHECK: vcvtpd2udq 1024(%rdx){1to8}, %ymm15
+// CHECK:  encoding: [0x62,0x71,0xfc,0x58,0x79,0xba,0x00,0x04,0x00,0x00]
+          vcvtpd2udq 1024(%rdx){1to8}, %ymm15
+
+// CHECK: vcvtpd2udq -1024(%rdx){1to8}, %ymm15
+// CHECK:  encoding: [0x62,0x71,0xfc,0x58,0x79,0x7a,0x80]
+          vcvtpd2udq -1024(%rdx){1to8}, %ymm15
+
+// CHECK: vcvtpd2udq -1032(%rdx){1to8}, %ymm15
+// CHECK:  encoding: [0x62,0x71,0xfc,0x58,0x79,0xba,0xf8,0xfb,0xff,0xff]
+          vcvtpd2udq -1032(%rdx){1to8}, %ymm15
+
+// CHECK: vcvttpd2udq %zmm20, %ymm16
+// CHECK:  encoding: [0x62,0xa1,0xfc,0x48,0x78,0xc4]
+          vcvttpd2udq %zmm20, %ymm16
+
+// CHECK: vcvttpd2udq %zmm20, %ymm16 {%k7}
+// CHECK:  encoding: [0x62,0xa1,0xfc,0x4f,0x78,0xc4]
+          vcvttpd2udq %zmm20, %ymm16 {%k7}
+
+// CHECK: vcvttpd2udq %zmm20, %ymm16 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa1,0xfc,0xcf,0x78,0xc4]
+          vcvttpd2udq %zmm20, %ymm16 {%k7} {z}
+
+// CHECK: vcvttpd2udq {sae}, %zmm20, %ymm16
+// CHECK:  encoding: [0x62,0xa1,0xfc,0x18,0x78,0xc4]
+          vcvttpd2udq {sae}, %zmm20, %ymm16
+
+// CHECK: vcvttpd2udq (%rcx), %ymm16
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x48,0x78,0x01]
+          vcvttpd2udq (%rcx), %ymm16
+
+// CHECK: vcvttpd2udq 291(%rax,%r14,8), %ymm16
+// CHECK:  encoding: [0x62,0xa1,0xfc,0x48,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvttpd2udq 291(%rax,%r14,8), %ymm16
+
+// CHECK: vcvttpd2udq (%rcx){1to8}, %ymm16
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x58,0x78,0x01]
+          vcvttpd2udq (%rcx){1to8}, %ymm16
+
+// CHECK: vcvttpd2udq 8128(%rdx), %ymm16
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x48,0x78,0x42,0x7f]
+          vcvttpd2udq 8128(%rdx), %ymm16
+
+// CHECK: vcvttpd2udq 8192(%rdx), %ymm16
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x48,0x78,0x82,0x00,0x20,0x00,0x00]
+          vcvttpd2udq 8192(%rdx), %ymm16
+
+// CHECK: vcvttpd2udq -8192(%rdx), %ymm16
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x48,0x78,0x42,0x80]
+          vcvttpd2udq -8192(%rdx), %ymm16
+
+// CHECK: vcvttpd2udq -8256(%rdx), %ymm16
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x48,0x78,0x82,0xc0,0xdf,0xff,0xff]
+          vcvttpd2udq -8256(%rdx), %ymm16
+
+// CHECK: vcvttpd2udq 1016(%rdx){1to8}, %ymm16
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x58,0x78,0x42,0x7f]
+          vcvttpd2udq 1016(%rdx){1to8}, %ymm16
+
+// CHECK: vcvttpd2udq 1024(%rdx){1to8}, %ymm16
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x58,0x78,0x82,0x00,0x04,0x00,0x00]
+          vcvttpd2udq 1024(%rdx){1to8}, %ymm16
+
+// CHECK: vcvttpd2udq -1024(%rdx){1to8}, %ymm16
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x58,0x78,0x42,0x80]
+          vcvttpd2udq -1024(%rdx){1to8}, %ymm16
+
+// CHECK: vcvttpd2udq -1032(%rdx){1to8}, %ymm16
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x58,0x78,0x82,0xf8,0xfb,0xff,0xff]
+          vcvttpd2udq -1032(%rdx){1to8}, %ymm16
+          
+// CHECK: vcvttpd2dq %zmm9, %ymm27
+// CHECK:  encoding: [0x62,0x41,0xfd,0x48,0xe6,0xd9]
+          vcvttpd2dq %zmm9, %ymm27
+
+// CHECK: vcvttpd2dq %zmm9, %ymm27 {%k5}
+// CHECK:  encoding: [0x62,0x41,0xfd,0x4d,0xe6,0xd9]
+          vcvttpd2dq %zmm9, %ymm27 {%k5}
+
+// CHECK: vcvttpd2dq %zmm9, %ymm27 {%k5} {z}
+// CHECK:  encoding: [0x62,0x41,0xfd,0xcd,0xe6,0xd9]
+          vcvttpd2dq %zmm9, %ymm27 {%k5} {z}
+
+// CHECK: vcvttpd2dq {sae}, %zmm9, %ymm27
+// CHECK:  encoding: [0x62,0x41,0xfd,0x18,0xe6,0xd9]
+          vcvttpd2dq {sae}, %zmm9, %ymm27
+
+// CHECK: vcvttpd2dq (%rcx), %ymm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x48,0xe6,0x19]
+          vcvttpd2dq (%rcx), %ymm27
+
+// CHECK: vcvttpd2dq 291(%rax,%r14,8), %ymm27
+// CHECK:  encoding: [0x62,0x21,0xfd,0x48,0xe6,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vcvttpd2dq 291(%rax,%r14,8), %ymm27
+
+// CHECK: vcvttpd2dq (%rcx){1to8}, %ymm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x58,0xe6,0x19]
+          vcvttpd2dq (%rcx){1to8}, %ymm27
+
+// CHECK: vcvttpd2dq 8128(%rdx), %ymm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x48,0xe6,0x5a,0x7f]
+          vcvttpd2dq 8128(%rdx), %ymm27
+
+// CHECK: vcvttpd2dq 8192(%rdx), %ymm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x48,0xe6,0x9a,0x00,0x20,0x00,0x00]
+          vcvttpd2dq 8192(%rdx), %ymm27
+
+// CHECK: vcvttpd2dq -8192(%rdx), %ymm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x48,0xe6,0x5a,0x80]
+          vcvttpd2dq -8192(%rdx), %ymm27
+
+// CHECK: vcvttpd2dq -8256(%rdx), %ymm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x48,0xe6,0x9a,0xc0,0xdf,0xff,0xff]
+          vcvttpd2dq -8256(%rdx), %ymm27
+
+// CHECK: vcvttpd2dq 1016(%rdx){1to8}, %ymm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x58,0xe6,0x5a,0x7f]
+          vcvttpd2dq 1016(%rdx){1to8}, %ymm27
+
+// CHECK: vcvttpd2dq 1024(%rdx){1to8}, %ymm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x58,0xe6,0x9a,0x00,0x04,0x00,0x00]
+          vcvttpd2dq 1024(%rdx){1to8}, %ymm27
+
+// CHECK: vcvttpd2dq -1024(%rdx){1to8}, %ymm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x58,0xe6,0x5a,0x80]
+          vcvttpd2dq -1024(%rdx){1to8}, %ymm27
+
+// CHECK: vcvttpd2dq -1032(%rdx){1to8}, %ymm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x58,0xe6,0x9a,0xf8,0xfb,0xff,0xff]
+          vcvttpd2dq -1032(%rdx){1to8}, %ymm27
+
+// CHECK: vcvtsd2ss %xmm12, %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xc1,0xb7,0x48,0x5a,0xcc]
+          vcvtsd2ss %xmm12, %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss %xmm12, %xmm9, %xmm17 {%k6}
+// CHECK:  encoding: [0x62,0xc1,0xb7,0x4e,0x5a,0xcc]
+          vcvtsd2ss %xmm12, %xmm9, %xmm17 {%k6}
+
+// CHECK: vcvtsd2ss %xmm12, %xmm9, %xmm17 {%k6} {z}
+// CHECK:  encoding: [0x62,0xc1,0xb7,0xce,0x5a,0xcc]
+          vcvtsd2ss %xmm12, %xmm9, %xmm17 {%k6} {z}
+
+// CHECK: vcvtsd2ss {rn-sae}, %xmm12, %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xc1,0xb7,0x18,0x5a,0xcc]
+          vcvtsd2ss {rn-sae}, %xmm12, %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss {ru-sae}, %xmm12, %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xc1,0xb7,0x58,0x5a,0xcc]
+          vcvtsd2ss {ru-sae}, %xmm12, %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss {rd-sae}, %xmm12, %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xc1,0xb7,0x38,0x5a,0xcc]
+          vcvtsd2ss {rd-sae}, %xmm12, %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss {rz-sae}, %xmm12, %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xc1,0xb7,0x78,0x5a,0xcc]
+          vcvtsd2ss {rz-sae}, %xmm12, %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss (%rcx), %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xb7,0x48,0x5a,0x09]
+          vcvtsd2ss (%rcx), %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss 291(%rax,%r14,8), %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0xb7,0x48,0x5a,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vcvtsd2ss 291(%rax,%r14,8), %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss 1016(%rdx), %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xb7,0x48,0x5a,0x4a,0x7f]
+          vcvtsd2ss 1016(%rdx), %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss 1024(%rdx), %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xb7,0x48,0x5a,0x8a,0x00,0x04,0x00,0x00]
+          vcvtsd2ss 1024(%rdx), %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss -1024(%rdx), %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xb7,0x48,0x5a,0x4a,0x80]
+          vcvtsd2ss -1024(%rdx), %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss -1032(%rdx), %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xb7,0x48,0x5a,0x8a,0xf8,0xfb,0xff,0xff]
+          vcvtsd2ss -1032(%rdx), %xmm9, %xmm17
+
+// CHECK: vcvtss2sd %xmm6, %xmm6, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x4e,0x48,0x5a,0xe6]
+          vcvtss2sd %xmm6, %xmm6, %xmm28
+
+// CHECK: vcvtss2sd %xmm6, %xmm6, %xmm28 {%k3}
+// CHECK:  encoding: [0x62,0x61,0x4e,0x4b,0x5a,0xe6]
+          vcvtss2sd %xmm6, %xmm6, %xmm28 {%k3}
+
+// CHECK: vcvtss2sd %xmm6, %xmm6, %xmm28 {%k3} {z}
+// CHECK:  encoding: [0x62,0x61,0x4e,0xcb,0x5a,0xe6]
+          vcvtss2sd %xmm6, %xmm6, %xmm28 {%k3} {z}
+
+// CHECK: vcvtss2sd {sae}, %xmm6, %xmm6, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x4e,0x18,0x5a,0xe6]
+          vcvtss2sd {sae}, %xmm6, %xmm6, %xmm28
+
+// CHECK: vcvtss2sd (%rcx), %xmm6, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x4e,0x48,0x5a,0x21]
+          vcvtss2sd (%rcx), %xmm6, %xmm28
+
+// CHECK: vcvtss2sd 291(%rax,%r14,8), %xmm6, %xmm28
+// CHECK:  encoding: [0x62,0x21,0x4e,0x48,0x5a,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vcvtss2sd 291(%rax,%r14,8), %xmm6, %xmm28
+
+// CHECK: vcvtss2sd 508(%rdx), %xmm6, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x4e,0x48,0x5a,0x62,0x7f]
+          vcvtss2sd 508(%rdx), %xmm6, %xmm28
+
+// CHECK: vcvtss2sd 512(%rdx), %xmm6, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x4e,0x48,0x5a,0xa2,0x00,0x02,0x00,0x00]
+          vcvtss2sd 512(%rdx), %xmm6, %xmm28
+
+// CHECK: vcvtss2sd -512(%rdx), %xmm6, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x4e,0x48,0x5a,0x62,0x80]
+          vcvtss2sd -512(%rdx), %xmm6, %xmm28
+
+// CHECK: vcvtss2sd -516(%rdx), %xmm6, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x4e,0x48,0x5a,0xa2,0xfc,0xfd,0xff,0xff]
+          vcvtss2sd -516(%rdx), %xmm6, %xmm28
+
+// CHECK: vcvtsd2si {rn-sae}, %xmm7, %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x18,0x2d,0xc7]
+          vcvtsd2si {rn-sae}, %xmm7, %eax
+
+// CHECK: vcvtsd2si {ru-sae}, %xmm7, %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x58,0x2d,0xc7]
+          vcvtsd2si {ru-sae}, %xmm7, %eax
+
+// CHECK: vcvtsd2si {rd-sae}, %xmm7, %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x38,0x2d,0xc7]
+          vcvtsd2si {rd-sae}, %xmm7, %eax
+
+// CHECK: vcvtsd2si {rz-sae}, %xmm7, %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x78,0x2d,0xc7]
+          vcvtsd2si {rz-sae}, %xmm7, %eax
+
+// CHECK: vcvtsd2si {rn-sae}, %xmm7, %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x18,0x2d,0xef]
+          vcvtsd2si {rn-sae}, %xmm7, %ebp
+
+// CHECK: vcvtsd2si {ru-sae}, %xmm7, %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x58,0x2d,0xef]
+          vcvtsd2si {ru-sae}, %xmm7, %ebp
+
+// CHECK: vcvtsd2si {rd-sae}, %xmm7, %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x38,0x2d,0xef]
+          vcvtsd2si {rd-sae}, %xmm7, %ebp
+
+// CHECK: vcvtsd2si {rz-sae}, %xmm7, %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x78,0x2d,0xef]
+          vcvtsd2si {rz-sae}, %xmm7, %ebp
+
+// CHECK: vcvtsd2si {rn-sae}, %xmm7, %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x18,0x2d,0xef]
+          vcvtsd2si {rn-sae}, %xmm7, %r13d
+
+// CHECK: vcvtsd2si {ru-sae}, %xmm7, %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x58,0x2d,0xef]
+          vcvtsd2si {ru-sae}, %xmm7, %r13d
+
+// CHECK: vcvtsd2si {rd-sae}, %xmm7, %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x38,0x2d,0xef]
+          vcvtsd2si {rd-sae}, %xmm7, %r13d
+
+// CHECK: vcvtsd2si {rz-sae}, %xmm7, %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x78,0x2d,0xef]
+          vcvtsd2si {rz-sae}, %xmm7, %r13d
+
+// CHECK: vcvtsd2si {rn-sae}, %xmm10, %rax
+// CHECK:  encoding: [0x62,0xd1,0xff,0x18,0x2d,0xc2]
+          vcvtsd2si {rn-sae}, %xmm10, %rax
+
+// CHECK: vcvtsd2si {ru-sae}, %xmm10, %rax
+// CHECK:  encoding: [0x62,0xd1,0xff,0x58,0x2d,0xc2]
+          vcvtsd2si {ru-sae}, %xmm10, %rax
+
+// CHECK: vcvtsd2si {rd-sae}, %xmm10, %rax
+// CHECK:  encoding: [0x62,0xd1,0xff,0x38,0x2d,0xc2]
+          vcvtsd2si {rd-sae}, %xmm10, %rax
+
+// CHECK: vcvtsd2si {rz-sae}, %xmm10, %rax
+// CHECK:  encoding: [0x62,0xd1,0xff,0x78,0x2d,0xc2]
+          vcvtsd2si {rz-sae}, %xmm10, %rax
+
+// CHECK: vcvtsd2si {rn-sae}, %xmm10, %r8
+// CHECK:  encoding: [0x62,0x51,0xff,0x18,0x2d,0xc2]
+          vcvtsd2si {rn-sae}, %xmm10, %r8
+
+// CHECK: vcvtsd2si {ru-sae}, %xmm10, %r8
+// CHECK:  encoding: [0x62,0x51,0xff,0x58,0x2d,0xc2]
+          vcvtsd2si {ru-sae}, %xmm10, %r8
+
+// CHECK: vcvtsd2si {rd-sae}, %xmm10, %r8
+// CHECK:  encoding: [0x62,0x51,0xff,0x38,0x2d,0xc2]
+          vcvtsd2si {rd-sae}, %xmm10, %r8
+
+// CHECK: vcvtsd2si {rz-sae}, %xmm10, %r8
+// CHECK:  encoding: [0x62,0x51,0xff,0x78,0x2d,0xc2]
+          vcvtsd2si {rz-sae}, %xmm10, %r8
+
+// CHECK: vcvtsd2usi %xmm30, %eax
+// CHECK:  encoding: [0x62,0x91,0x7f,0x08,0x79,0xc6]
+          vcvtsd2usi %xmm30, %eax
+
+// CHECK: vcvtsd2usi {rn-sae}, %xmm30, %eax
+// CHECK:  encoding: [0x62,0x91,0x7f,0x18,0x79,0xc6]
+          vcvtsd2usi {rn-sae}, %xmm30, %eax
+
+// CHECK: vcvtsd2usi {ru-sae}, %xmm30, %eax
+// CHECK:  encoding: [0x62,0x91,0x7f,0x58,0x79,0xc6]
+          vcvtsd2usi {ru-sae}, %xmm30, %eax
+
+// CHECK: vcvtsd2usi {rd-sae}, %xmm30, %eax
+// CHECK:  encoding: [0x62,0x91,0x7f,0x38,0x79,0xc6]
+          vcvtsd2usi {rd-sae}, %xmm30, %eax
+
+// CHECK: vcvtsd2usi {rz-sae}, %xmm30, %eax
+// CHECK:  encoding: [0x62,0x91,0x7f,0x78,0x79,0xc6]
+          vcvtsd2usi {rz-sae}, %xmm30, %eax
+
+// CHECK: vcvtsd2usi (%rcx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x79,0x01]
+          vcvtsd2usi (%rcx), %eax
+
+// CHECK: vcvtsd2usi 291(%rax,%r14,8), %eax
+// CHECK:  encoding: [0x62,0xb1,0x7f,0x08,0x79,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvtsd2usi 291(%rax,%r14,8), %eax
+
+// CHECK: vcvtsd2usi 1016(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x79,0x42,0x7f]
+          vcvtsd2usi 1016(%rdx), %eax
+
+// CHECK: vcvtsd2usi 1024(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x79,0x82,0x00,0x04,0x00,0x00]
+          vcvtsd2usi 1024(%rdx), %eax
+
+// CHECK: vcvtsd2usi -1024(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x79,0x42,0x80]
+          vcvtsd2usi -1024(%rdx), %eax
+
+// CHECK: vcvtsd2usi -1032(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x79,0x82,0xf8,0xfb,0xff,0xff]
+          vcvtsd2usi -1032(%rdx), %eax
+
+// CHECK: vcvtsd2usi %xmm30, %ebp
+// CHECK:  encoding: [0x62,0x91,0x7f,0x08,0x79,0xee]
+          vcvtsd2usi %xmm30, %ebp
+
+// CHECK: vcvtsd2usi {rn-sae}, %xmm30, %ebp
+// CHECK:  encoding: [0x62,0x91,0x7f,0x18,0x79,0xee]
+          vcvtsd2usi {rn-sae}, %xmm30, %ebp
+
+// CHECK: vcvtsd2usi {ru-sae}, %xmm30, %ebp
+// CHECK:  encoding: [0x62,0x91,0x7f,0x58,0x79,0xee]
+          vcvtsd2usi {ru-sae}, %xmm30, %ebp
+
+// CHECK: vcvtsd2usi {rd-sae}, %xmm30, %ebp
+// CHECK:  encoding: [0x62,0x91,0x7f,0x38,0x79,0xee]
+          vcvtsd2usi {rd-sae}, %xmm30, %ebp
+
+// CHECK: vcvtsd2usi {rz-sae}, %xmm30, %ebp
+// CHECK:  encoding: [0x62,0x91,0x7f,0x78,0x79,0xee]
+          vcvtsd2usi {rz-sae}, %xmm30, %ebp
+
+// CHECK: vcvtsd2usi (%rcx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x79,0x29]
+          vcvtsd2usi (%rcx), %ebp
+
+// CHECK: vcvtsd2usi 291(%rax,%r14,8), %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7f,0x08,0x79,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcvtsd2usi 291(%rax,%r14,8), %ebp
+
+// CHECK: vcvtsd2usi 1016(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x79,0x6a,0x7f]
+          vcvtsd2usi 1016(%rdx), %ebp
+
+// CHECK: vcvtsd2usi 1024(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x79,0xaa,0x00,0x04,0x00,0x00]
+          vcvtsd2usi 1024(%rdx), %ebp
+
+// CHECK: vcvtsd2usi -1024(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x79,0x6a,0x80]
+          vcvtsd2usi -1024(%rdx), %ebp
+
+// CHECK: vcvtsd2usi -1032(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x79,0xaa,0xf8,0xfb,0xff,0xff]
+          vcvtsd2usi -1032(%rdx), %ebp
+
+// CHECK: vcvtsd2usi %xmm30, %r13d
+// CHECK:  encoding: [0x62,0x11,0x7f,0x08,0x79,0xee]
+          vcvtsd2usi %xmm30, %r13d
+
+// CHECK: vcvtsd2usi {rn-sae}, %xmm30, %r13d
+// CHECK:  encoding: [0x62,0x11,0x7f,0x18,0x79,0xee]
+          vcvtsd2usi {rn-sae}, %xmm30, %r13d
+
+// CHECK: vcvtsd2usi {ru-sae}, %xmm30, %r13d
+// CHECK:  encoding: [0x62,0x11,0x7f,0x58,0x79,0xee]
+          vcvtsd2usi {ru-sae}, %xmm30, %r13d
+
+// CHECK: vcvtsd2usi {rd-sae}, %xmm30, %r13d
+// CHECK:  encoding: [0x62,0x11,0x7f,0x38,0x79,0xee]
+          vcvtsd2usi {rd-sae}, %xmm30, %r13d
+
+// CHECK: vcvtsd2usi {rz-sae}, %xmm30, %r13d
+// CHECK:  encoding: [0x62,0x11,0x7f,0x78,0x79,0xee]
+          vcvtsd2usi {rz-sae}, %xmm30, %r13d
+
+// CHECK: vcvtsd2usi (%rcx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x08,0x79,0x29]
+          vcvtsd2usi (%rcx), %r13d
+
+// CHECK: vcvtsd2usi 291(%rax,%r14,8), %r13d
+// CHECK:  encoding: [0x62,0x31,0x7f,0x08,0x79,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcvtsd2usi 291(%rax,%r14,8), %r13d
+
+// CHECK: vcvtsd2usi 1016(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x08,0x79,0x6a,0x7f]
+          vcvtsd2usi 1016(%rdx), %r13d
+
+// CHECK: vcvtsd2usi 1024(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x08,0x79,0xaa,0x00,0x04,0x00,0x00]
+          vcvtsd2usi 1024(%rdx), %r13d
+
+// CHECK: vcvtsd2usi -1024(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x08,0x79,0x6a,0x80]
+          vcvtsd2usi -1024(%rdx), %r13d
+
+// CHECK: vcvtsd2usi -1032(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x08,0x79,0xaa,0xf8,0xfb,0xff,0xff]
+          vcvtsd2usi -1032(%rdx), %r13d
+
+// CHECK: vcvtsd2usi %xmm18, %rax
+// CHECK:  encoding: [0x62,0xb1,0xff,0x08,0x79,0xc2]
+          vcvtsd2usi %xmm18, %rax
+
+// CHECK: vcvtsd2usi {rn-sae}, %xmm18, %rax
+// CHECK:  encoding: [0x62,0xb1,0xff,0x18,0x79,0xc2]
+          vcvtsd2usi {rn-sae}, %xmm18, %rax
+
+// CHECK: vcvtsd2usi {ru-sae}, %xmm18, %rax
+// CHECK:  encoding: [0x62,0xb1,0xff,0x58,0x79,0xc2]
+          vcvtsd2usi {ru-sae}, %xmm18, %rax
+
+// CHECK: vcvtsd2usi {rd-sae}, %xmm18, %rax
+// CHECK:  encoding: [0x62,0xb1,0xff,0x38,0x79,0xc2]
+          vcvtsd2usi {rd-sae}, %xmm18, %rax
+
+// CHECK: vcvtsd2usi {rz-sae}, %xmm18, %rax
+// CHECK:  encoding: [0x62,0xb1,0xff,0x78,0x79,0xc2]
+          vcvtsd2usi {rz-sae}, %xmm18, %rax
+
+// CHECK: vcvtsd2usi (%rcx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x79,0x01]
+          vcvtsd2usi (%rcx), %rax
+
+// CHECK: vcvtsd2usi 291(%rax,%r14,8), %rax
+// CHECK:  encoding: [0x62,0xb1,0xff,0x08,0x79,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvtsd2usi 291(%rax,%r14,8), %rax
+
+// CHECK: vcvtsd2usi 1016(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x79,0x42,0x7f]
+          vcvtsd2usi 1016(%rdx), %rax
+
+// CHECK: vcvtsd2usi 1024(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x79,0x82,0x00,0x04,0x00,0x00]
+          vcvtsd2usi 1024(%rdx), %rax
+
+// CHECK: vcvtsd2usi -1024(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x79,0x42,0x80]
+          vcvtsd2usi -1024(%rdx), %rax
+
+// CHECK: vcvtsd2usi -1032(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x79,0x82,0xf8,0xfb,0xff,0xff]
+          vcvtsd2usi -1032(%rdx), %rax
+
+// CHECK: vcvtsd2usi %xmm18, %r8
+// CHECK:  encoding: [0x62,0x31,0xff,0x08,0x79,0xc2]
+          vcvtsd2usi %xmm18, %r8
+
+// CHECK: vcvtsd2usi {rn-sae}, %xmm18, %r8
+// CHECK:  encoding: [0x62,0x31,0xff,0x18,0x79,0xc2]
+          vcvtsd2usi {rn-sae}, %xmm18, %r8
+
+// CHECK: vcvtsd2usi {ru-sae}, %xmm18, %r8
+// CHECK:  encoding: [0x62,0x31,0xff,0x58,0x79,0xc2]
+          vcvtsd2usi {ru-sae}, %xmm18, %r8
+
+// CHECK: vcvtsd2usi {rd-sae}, %xmm18, %r8
+// CHECK:  encoding: [0x62,0x31,0xff,0x38,0x79,0xc2]
+          vcvtsd2usi {rd-sae}, %xmm18, %r8
+
+// CHECK: vcvtsd2usi {rz-sae}, %xmm18, %r8
+// CHECK:  encoding: [0x62,0x31,0xff,0x78,0x79,0xc2]
+          vcvtsd2usi {rz-sae}, %xmm18, %r8
+
+// CHECK: vcvtsd2usi (%rcx), %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x79,0x01]
+          vcvtsd2usi (%rcx), %r8
+
+// CHECK: vcvtsd2usi 291(%rax,%r14,8), %r8
+// CHECK:  encoding: [0x62,0x31,0xff,0x08,0x79,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvtsd2usi 291(%rax,%r14,8), %r8
+
+// CHECK: vcvtsd2usi 1016(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x79,0x42,0x7f]
+          vcvtsd2usi 1016(%rdx), %r8
+
+// CHECK: vcvtsd2usi 1024(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x79,0x82,0x00,0x04,0x00,0x00]
+          vcvtsd2usi 1024(%rdx), %r8
+
+// CHECK: vcvtsd2usi -1024(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x79,0x42,0x80]
+          vcvtsd2usi -1024(%rdx), %r8
+
+// CHECK: vcvtsd2usi -1032(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x79,0x82,0xf8,0xfb,0xff,0xff]
+          vcvtsd2usi -1032(%rdx), %r8
+
+// CHECK: vcvtss2si {rn-sae}, %xmm22, %eax
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x18,0x2d,0xc6]
+          vcvtss2si {rn-sae}, %xmm22, %eax
+
+// CHECK: vcvtss2si {ru-sae}, %xmm22, %eax
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x58,0x2d,0xc6]
+          vcvtss2si {ru-sae}, %xmm22, %eax
+
+// CHECK: vcvtss2si {rd-sae}, %xmm22, %eax
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x38,0x2d,0xc6]
+          vcvtss2si {rd-sae}, %xmm22, %eax
+
+// CHECK: vcvtss2si {rz-sae}, %xmm22, %eax
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x78,0x2d,0xc6]
+          vcvtss2si {rz-sae}, %xmm22, %eax
+
+// CHECK: vcvtss2si {rn-sae}, %xmm22, %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x18,0x2d,0xee]
+          vcvtss2si {rn-sae}, %xmm22, %ebp
+
+// CHECK: vcvtss2si {ru-sae}, %xmm22, %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x58,0x2d,0xee]
+          vcvtss2si {ru-sae}, %xmm22, %ebp
+
+// CHECK: vcvtss2si {rd-sae}, %xmm22, %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x38,0x2d,0xee]
+          vcvtss2si {rd-sae}, %xmm22, %ebp
+
+// CHECK: vcvtss2si {rz-sae}, %xmm22, %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x78,0x2d,0xee]
+          vcvtss2si {rz-sae}, %xmm22, %ebp
+
+// CHECK: vcvtss2si {rn-sae}, %xmm22, %r13d
+// CHECK:  encoding: [0x62,0x31,0x7e,0x18,0x2d,0xee]
+          vcvtss2si {rn-sae}, %xmm22, %r13d
+
+// CHECK: vcvtss2si {ru-sae}, %xmm22, %r13d
+// CHECK:  encoding: [0x62,0x31,0x7e,0x58,0x2d,0xee]
+          vcvtss2si {ru-sae}, %xmm22, %r13d
+
+// CHECK: vcvtss2si {rd-sae}, %xmm22, %r13d
+// CHECK:  encoding: [0x62,0x31,0x7e,0x38,0x2d,0xee]
+          vcvtss2si {rd-sae}, %xmm22, %r13d
+
+// CHECK: vcvtss2si {rz-sae}, %xmm22, %r13d
+// CHECK:  encoding: [0x62,0x31,0x7e,0x78,0x2d,0xee]
+          vcvtss2si {rz-sae}, %xmm22, %r13d
+
+// CHECK: vcvtss2si {rn-sae}, %xmm29, %rax
+// CHECK:  encoding: [0x62,0x91,0xfe,0x18,0x2d,0xc5]
+          vcvtss2si {rn-sae}, %xmm29, %rax
+
+// CHECK: vcvtss2si {ru-sae}, %xmm29, %rax
+// CHECK:  encoding: [0x62,0x91,0xfe,0x58,0x2d,0xc5]
+          vcvtss2si {ru-sae}, %xmm29, %rax
+
+// CHECK: vcvtss2si {rd-sae}, %xmm29, %rax
+// CHECK:  encoding: [0x62,0x91,0xfe,0x38,0x2d,0xc5]
+          vcvtss2si {rd-sae}, %xmm29, %rax
+
+// CHECK: vcvtss2si {rz-sae}, %xmm29, %rax
+// CHECK:  encoding: [0x62,0x91,0xfe,0x78,0x2d,0xc5]
+          vcvtss2si {rz-sae}, %xmm29, %rax
+
+// CHECK: vcvtss2si {rn-sae}, %xmm29, %r8
+// CHECK:  encoding: [0x62,0x11,0xfe,0x18,0x2d,0xc5]
+          vcvtss2si {rn-sae}, %xmm29, %r8
+
+// CHECK: vcvtss2si {ru-sae}, %xmm29, %r8
+// CHECK:  encoding: [0x62,0x11,0xfe,0x58,0x2d,0xc5]
+          vcvtss2si {ru-sae}, %xmm29, %r8
+
+// CHECK: vcvtss2si {rd-sae}, %xmm29, %r8
+// CHECK:  encoding: [0x62,0x11,0xfe,0x38,0x2d,0xc5]
+          vcvtss2si {rd-sae}, %xmm29, %r8
+
+// CHECK: vcvtss2si {rz-sae}, %xmm29, %r8
+// CHECK:  encoding: [0x62,0x11,0xfe,0x78,0x2d,0xc5]
+          vcvtss2si {rz-sae}, %xmm29, %r8
+
+// CHECK: vcvtss2usi %xmm28, %eax
+// CHECK:  encoding: [0x62,0x91,0x7e,0x08,0x79,0xc4]
+          vcvtss2usi %xmm28, %eax
+
+// CHECK: vcvtss2usi {rn-sae}, %xmm28, %eax
+// CHECK:  encoding: [0x62,0x91,0x7e,0x18,0x79,0xc4]
+          vcvtss2usi {rn-sae}, %xmm28, %eax
+
+// CHECK: vcvtss2usi {ru-sae}, %xmm28, %eax
+// CHECK:  encoding: [0x62,0x91,0x7e,0x58,0x79,0xc4]
+          vcvtss2usi {ru-sae}, %xmm28, %eax
+
+// CHECK: vcvtss2usi {rd-sae}, %xmm28, %eax
+// CHECK:  encoding: [0x62,0x91,0x7e,0x38,0x79,0xc4]
+          vcvtss2usi {rd-sae}, %xmm28, %eax
+
+// CHECK: vcvtss2usi {rz-sae}, %xmm28, %eax
+// CHECK:  encoding: [0x62,0x91,0x7e,0x78,0x79,0xc4]
+          vcvtss2usi {rz-sae}, %xmm28, %eax
+
+// CHECK: vcvtss2usi (%rcx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x79,0x01]
+          vcvtss2usi (%rcx), %eax
+
+// CHECK: vcvtss2usi 291(%rax,%r14,8), %eax
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x08,0x79,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvtss2usi 291(%rax,%r14,8), %eax
+
+// CHECK: vcvtss2usi 508(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x79,0x42,0x7f]
+          vcvtss2usi 508(%rdx), %eax
+
+// CHECK: vcvtss2usi 512(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x79,0x82,0x00,0x02,0x00,0x00]
+          vcvtss2usi 512(%rdx), %eax
+
+// CHECK: vcvtss2usi -512(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x79,0x42,0x80]
+          vcvtss2usi -512(%rdx), %eax
+
+// CHECK: vcvtss2usi -516(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x79,0x82,0xfc,0xfd,0xff,0xff]
+          vcvtss2usi -516(%rdx), %eax
+
+// CHECK: vcvtss2usi %xmm28, %ebp
+// CHECK:  encoding: [0x62,0x91,0x7e,0x08,0x79,0xec]
+          vcvtss2usi %xmm28, %ebp
+
+// CHECK: vcvtss2usi {rn-sae}, %xmm28, %ebp
+// CHECK:  encoding: [0x62,0x91,0x7e,0x18,0x79,0xec]
+          vcvtss2usi {rn-sae}, %xmm28, %ebp
+
+// CHECK: vcvtss2usi {ru-sae}, %xmm28, %ebp
+// CHECK:  encoding: [0x62,0x91,0x7e,0x58,0x79,0xec]
+          vcvtss2usi {ru-sae}, %xmm28, %ebp
+
+// CHECK: vcvtss2usi {rd-sae}, %xmm28, %ebp
+// CHECK:  encoding: [0x62,0x91,0x7e,0x38,0x79,0xec]
+          vcvtss2usi {rd-sae}, %xmm28, %ebp
+
+// CHECK: vcvtss2usi {rz-sae}, %xmm28, %ebp
+// CHECK:  encoding: [0x62,0x91,0x7e,0x78,0x79,0xec]
+          vcvtss2usi {rz-sae}, %xmm28, %ebp
+
+// CHECK: vcvtss2usi (%rcx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x79,0x29]
+          vcvtss2usi (%rcx), %ebp
+
+// CHECK: vcvtss2usi 291(%rax,%r14,8), %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x08,0x79,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcvtss2usi 291(%rax,%r14,8), %ebp
+
+// CHECK: vcvtss2usi 508(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x79,0x6a,0x7f]
+          vcvtss2usi 508(%rdx), %ebp
+
+// CHECK: vcvtss2usi 512(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x79,0xaa,0x00,0x02,0x00,0x00]
+          vcvtss2usi 512(%rdx), %ebp
+
+// CHECK: vcvtss2usi -512(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x79,0x6a,0x80]
+          vcvtss2usi -512(%rdx), %ebp
+
+// CHECK: vcvtss2usi -516(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x79,0xaa,0xfc,0xfd,0xff,0xff]
+          vcvtss2usi -516(%rdx), %ebp
+
+// CHECK: vcvtss2usi %xmm28, %r13d
+// CHECK:  encoding: [0x62,0x11,0x7e,0x08,0x79,0xec]
+          vcvtss2usi %xmm28, %r13d
+
+// CHECK: vcvtss2usi {rn-sae}, %xmm28, %r13d
+// CHECK:  encoding: [0x62,0x11,0x7e,0x18,0x79,0xec]
+          vcvtss2usi {rn-sae}, %xmm28, %r13d
+
+// CHECK: vcvtss2usi {ru-sae}, %xmm28, %r13d
+// CHECK:  encoding: [0x62,0x11,0x7e,0x58,0x79,0xec]
+          vcvtss2usi {ru-sae}, %xmm28, %r13d
+
+// CHECK: vcvtss2usi {rd-sae}, %xmm28, %r13d
+// CHECK:  encoding: [0x62,0x11,0x7e,0x38,0x79,0xec]
+          vcvtss2usi {rd-sae}, %xmm28, %r13d
+
+// CHECK: vcvtss2usi {rz-sae}, %xmm28, %r13d
+// CHECK:  encoding: [0x62,0x11,0x7e,0x78,0x79,0xec]
+          vcvtss2usi {rz-sae}, %xmm28, %r13d
+
+// CHECK: vcvtss2usi (%rcx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7e,0x08,0x79,0x29]
+          vcvtss2usi (%rcx), %r13d
+
+// CHECK: vcvtss2usi 291(%rax,%r14,8), %r13d
+// CHECK:  encoding: [0x62,0x31,0x7e,0x08,0x79,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcvtss2usi 291(%rax,%r14,8), %r13d
+
+// CHECK: vcvtss2usi 508(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7e,0x08,0x79,0x6a,0x7f]
+          vcvtss2usi 508(%rdx), %r13d
+
+// CHECK: vcvtss2usi 512(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7e,0x08,0x79,0xaa,0x00,0x02,0x00,0x00]
+          vcvtss2usi 512(%rdx), %r13d
+
+// CHECK: vcvtss2usi -512(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7e,0x08,0x79,0x6a,0x80]
+          vcvtss2usi -512(%rdx), %r13d
+
+// CHECK: vcvtss2usi -516(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7e,0x08,0x79,0xaa,0xfc,0xfd,0xff,0xff]
+          vcvtss2usi -516(%rdx), %r13d
+
+// CHECK: vcvtss2usi %xmm23, %rax
+// CHECK:  encoding: [0x62,0xb1,0xfe,0x08,0x79,0xc7]
+          vcvtss2usi %xmm23, %rax
+
+// CHECK: vcvtss2usi {rn-sae}, %xmm23, %rax
+// CHECK:  encoding: [0x62,0xb1,0xfe,0x18,0x79,0xc7]
+          vcvtss2usi {rn-sae}, %xmm23, %rax
+
+// CHECK: vcvtss2usi {ru-sae}, %xmm23, %rax
+// CHECK:  encoding: [0x62,0xb1,0xfe,0x58,0x79,0xc7]
+          vcvtss2usi {ru-sae}, %xmm23, %rax
+
+// CHECK: vcvtss2usi {rd-sae}, %xmm23, %rax
+// CHECK:  encoding: [0x62,0xb1,0xfe,0x38,0x79,0xc7]
+          vcvtss2usi {rd-sae}, %xmm23, %rax
+
+// CHECK: vcvtss2usi {rz-sae}, %xmm23, %rax
+// CHECK:  encoding: [0x62,0xb1,0xfe,0x78,0x79,0xc7]
+          vcvtss2usi {rz-sae}, %xmm23, %rax
+
+// CHECK: vcvtss2usi (%rcx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x08,0x79,0x01]
+          vcvtss2usi (%rcx), %rax
+
+// CHECK: vcvtss2usi 291(%rax,%r14,8), %rax
+// CHECK:  encoding: [0x62,0xb1,0xfe,0x08,0x79,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvtss2usi 291(%rax,%r14,8), %rax
+
+// CHECK: vcvtss2usi 508(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x08,0x79,0x42,0x7f]
+          vcvtss2usi 508(%rdx), %rax
+
+// CHECK: vcvtss2usi 512(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x08,0x79,0x82,0x00,0x02,0x00,0x00]
+          vcvtss2usi 512(%rdx), %rax
+
+// CHECK: vcvtss2usi -512(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x08,0x79,0x42,0x80]
+          vcvtss2usi -512(%rdx), %rax
+
+// CHECK: vcvtss2usi -516(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x08,0x79,0x82,0xfc,0xfd,0xff,0xff]
+          vcvtss2usi -516(%rdx), %rax
+
+// CHECK: vcvtss2usi %xmm23, %r8
+// CHECK:  encoding: [0x62,0x31,0xfe,0x08,0x79,0xc7]
+          vcvtss2usi %xmm23, %r8
+
+// CHECK: vcvtss2usi {rn-sae}, %xmm23, %r8
+// CHECK:  encoding: [0x62,0x31,0xfe,0x18,0x79,0xc7]
+          vcvtss2usi {rn-sae}, %xmm23, %r8
+
+// CHECK: vcvtss2usi {ru-sae}, %xmm23, %r8
+// CHECK:  encoding: [0x62,0x31,0xfe,0x58,0x79,0xc7]
+          vcvtss2usi {ru-sae}, %xmm23, %r8
+
+// CHECK: vcvtss2usi {rd-sae}, %xmm23, %r8
+// CHECK:  encoding: [0x62,0x31,0xfe,0x38,0x79,0xc7]
+          vcvtss2usi {rd-sae}, %xmm23, %r8
+
+// CHECK: vcvtss2usi {rz-sae}, %xmm23, %r8
+// CHECK:  encoding: [0x62,0x31,0xfe,0x78,0x79,0xc7]
+          vcvtss2usi {rz-sae}, %xmm23, %r8
+
+// CHECK: vcvtss2usi (%rcx), %r8
+// CHECK:  encoding: [0x62,0x71,0xfe,0x08,0x79,0x01]
+          vcvtss2usi (%rcx), %r8
+
+// CHECK: vcvtss2usi 291(%rax,%r14,8), %r8
+// CHECK:  encoding: [0x62,0x31,0xfe,0x08,0x79,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvtss2usi 291(%rax,%r14,8), %r8
+
+// CHECK: vcvtss2usi 508(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xfe,0x08,0x79,0x42,0x7f]
+          vcvtss2usi 508(%rdx), %r8
+
+// CHECK: vcvtss2usi 512(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xfe,0x08,0x79,0x82,0x00,0x02,0x00,0x00]
+          vcvtss2usi 512(%rdx), %r8
+
+// CHECK: vcvtss2usi -512(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xfe,0x08,0x79,0x42,0x80]
+          vcvtss2usi -512(%rdx), %r8
+
+// CHECK: vcvtss2usi -516(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xfe,0x08,0x79,0x82,0xfc,0xfd,0xff,0xff]
+          vcvtss2usi -516(%rdx), %r8
+
+// CHECK: vcvttsd2si {sae}, %xmm3, %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x18,0x2c,0xc3]
+          vcvttsd2si {sae}, %xmm3, %eax
+
+// CHECK: vcvttsd2si {sae}, %xmm3, %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x18,0x2c,0xeb]
+          vcvttsd2si {sae}, %xmm3, %ebp
+
+// CHECK: vcvttsd2si {sae}, %xmm3, %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x18,0x2c,0xeb]
+          vcvttsd2si {sae}, %xmm3, %r13d
+
+// CHECK: vcvttsd2si {sae}, %xmm1, %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x18,0x2c,0xc1]
+          vcvttsd2si {sae}, %xmm1, %rax
+
+// CHECK: vcvttsd2usi %xmm21, %eax
+// CHECK:  encoding: [0x62,0xb1,0x7f,0x08,0x78,0xc5]
+          vcvttsd2usi %xmm21, %eax
+
+// CHECK: vcvttsd2usi {sae}, %xmm21, %eax
+// CHECK:  encoding: [0x62,0xb1,0x7f,0x18,0x78,0xc5]
+          vcvttsd2usi {sae}, %xmm21, %eax
+
+// CHECK: vcvttsd2usi (%rcx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x78,0x01]
+          vcvttsd2usi (%rcx), %eax
+
+// CHECK: vcvttsd2usi 291(%rax,%r14,8), %eax
+// CHECK:  encoding: [0x62,0xb1,0x7f,0x08,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvttsd2usi 291(%rax,%r14,8), %eax
+
+// CHECK: vcvttsd2usi 1016(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x78,0x42,0x7f]
+          vcvttsd2usi 1016(%rdx), %eax
+
+// CHECK: vcvttsd2usi 1024(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x78,0x82,0x00,0x04,0x00,0x00]
+          vcvttsd2usi 1024(%rdx), %eax
+
+// CHECK: vcvttsd2usi -1024(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x78,0x42,0x80]
+          vcvttsd2usi -1024(%rdx), %eax
+
+// CHECK: vcvttsd2usi -1032(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x78,0x82,0xf8,0xfb,0xff,0xff]
+          vcvttsd2usi -1032(%rdx), %eax
+
+// CHECK: vcvttsd2usi %xmm21, %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7f,0x08,0x78,0xed]
+          vcvttsd2usi %xmm21, %ebp
+
+// CHECK: vcvttsd2usi {sae}, %xmm21, %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7f,0x18,0x78,0xed]
+          vcvttsd2usi {sae}, %xmm21, %ebp
+
+// CHECK: vcvttsd2usi (%rcx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x78,0x29]
+          vcvttsd2usi (%rcx), %ebp
+
+// CHECK: vcvttsd2usi 291(%rax,%r14,8), %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7f,0x08,0x78,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcvttsd2usi 291(%rax,%r14,8), %ebp
+
+// CHECK: vcvttsd2usi 1016(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x78,0x6a,0x7f]
+          vcvttsd2usi 1016(%rdx), %ebp
+
+// CHECK: vcvttsd2usi 1024(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x78,0xaa,0x00,0x04,0x00,0x00]
+          vcvttsd2usi 1024(%rdx), %ebp
+
+// CHECK: vcvttsd2usi -1024(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x78,0x6a,0x80]
+          vcvttsd2usi -1024(%rdx), %ebp
+
+// CHECK: vcvttsd2usi -1032(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x78,0xaa,0xf8,0xfb,0xff,0xff]
+          vcvttsd2usi -1032(%rdx), %ebp
+
+// CHECK: vcvttsd2usi %xmm21, %r13d
+// CHECK:  encoding: [0x62,0x31,0x7f,0x08,0x78,0xed]
+          vcvttsd2usi %xmm21, %r13d
+
+// CHECK: vcvttsd2usi {sae}, %xmm21, %r13d
+// CHECK:  encoding: [0x62,0x31,0x7f,0x18,0x78,0xed]
+          vcvttsd2usi {sae}, %xmm21, %r13d
+
+// CHECK: vcvttsd2usi (%rcx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x08,0x78,0x29]
+          vcvttsd2usi (%rcx), %r13d
+
+// CHECK: vcvttsd2usi 291(%rax,%r14,8), %r13d
+// CHECK:  encoding: [0x62,0x31,0x7f,0x08,0x78,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcvttsd2usi 291(%rax,%r14,8), %r13d
+
+// CHECK: vcvttsd2usi 1016(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x08,0x78,0x6a,0x7f]
+          vcvttsd2usi 1016(%rdx), %r13d
+
+// CHECK: vcvttsd2usi 1024(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x08,0x78,0xaa,0x00,0x04,0x00,0x00]
+          vcvttsd2usi 1024(%rdx), %r13d
+
+// CHECK: vcvttsd2usi -1024(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x08,0x78,0x6a,0x80]
+          vcvttsd2usi -1024(%rdx), %r13d
+
+// CHECK: vcvttsd2usi -1032(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x08,0x78,0xaa,0xf8,0xfb,0xff,0xff]
+          vcvttsd2usi -1032(%rdx), %r13d
+
+// CHECK: vcvttsd2usi %xmm7, %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x78,0xc7]
+          vcvttsd2usi %xmm7, %rax
+
+// CHECK: vcvttsd2usi {sae}, %xmm7, %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x18,0x78,0xc7]
+          vcvttsd2usi {sae}, %xmm7, %rax
+
+// CHECK: vcvttsd2usi (%rcx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x78,0x01]
+          vcvttsd2usi (%rcx), %rax
+
+// CHECK: vcvttsd2usi 291(%rax,%r14,8), %rax
+// CHECK:  encoding: [0x62,0xb1,0xff,0x08,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvttsd2usi 291(%rax,%r14,8), %rax
+
+// CHECK: vcvttsd2usi 1016(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x78,0x42,0x7f]
+          vcvttsd2usi 1016(%rdx), %rax
+
+// CHECK: vcvttsd2usi 1024(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x78,0x82,0x00,0x04,0x00,0x00]
+          vcvttsd2usi 1024(%rdx), %rax
+
+// CHECK: vcvttsd2usi -1024(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x78,0x42,0x80]
+          vcvttsd2usi -1024(%rdx), %rax
+
+// CHECK: vcvttsd2usi -1032(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x78,0x82,0xf8,0xfb,0xff,0xff]
+          vcvttsd2usi -1032(%rdx), %rax
+
+// CHECK: vcvttsd2usi %xmm7, %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x78,0xc7]
+          vcvttsd2usi %xmm7, %r8
+
+// CHECK: vcvttsd2usi {sae}, %xmm7, %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x18,0x78,0xc7]
+          vcvttsd2usi {sae}, %xmm7, %r8
+
+// CHECK: vcvttsd2usi (%rcx), %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x78,0x01]
+          vcvttsd2usi (%rcx), %r8
+
+// CHECK: vcvttsd2usi 291(%rax,%r14,8), %r8
+// CHECK:  encoding: [0x62,0x31,0xff,0x08,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvttsd2usi 291(%rax,%r14,8), %r8
+
+// CHECK: vcvttsd2usi 1016(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x78,0x42,0x7f]
+          vcvttsd2usi 1016(%rdx), %r8
+
+// CHECK: vcvttsd2usi 1024(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x78,0x82,0x00,0x04,0x00,0x00]
+          vcvttsd2usi 1024(%rdx), %r8
+
+// CHECK: vcvttsd2usi -1024(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x78,0x42,0x80]
+          vcvttsd2usi -1024(%rdx), %r8
+
+// CHECK: vcvttsd2usi -1032(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x78,0x82,0xf8,0xfb,0xff,0xff]
+          vcvttsd2usi -1032(%rdx), %r8
+
+// CHECK: vcvttss2si {sae}, %xmm14, %eax
+// CHECK:  encoding: [0x62,0xd1,0x7e,0x18,0x2c,0xc6]
+          vcvttss2si {sae}, %xmm14, %eax
+
+// CHECK: vcvttss2si {sae}, %xmm14, %ebp
+// CHECK:  encoding: [0x62,0xd1,0x7e,0x18,0x2c,0xee]
+          vcvttss2si {sae}, %xmm14, %ebp
+
+// CHECK: vcvttss2si {sae}, %xmm14, %r13d
+// CHECK:  encoding: [0x62,0x51,0x7e,0x18,0x2c,0xee]
+          vcvttss2si {sae}, %xmm14, %r13d
+
+// CHECK: vcvttss2si {sae}, %xmm21, %rax
+// CHECK:  encoding: [0x62,0xb1,0xfe,0x18,0x2c,0xc5]
+          vcvttss2si {sae}, %xmm21, %rax
+
+// CHECK: vcvttss2si {sae}, %xmm21, %r8
+// CHECK:  encoding: [0x62,0x31,0xfe,0x18,0x2c,0xc5]
+          vcvttss2si {sae}, %xmm21, %r8
+
+// CHECK: vcvttss2usi %xmm18, %eax
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x08,0x78,0xc2]
+          vcvttss2usi %xmm18, %eax
+
+// CHECK: vcvttss2usi {sae}, %xmm18, %eax
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x18,0x78,0xc2]
+          vcvttss2usi {sae}, %xmm18, %eax
+
+// CHECK: vcvttss2usi (%rcx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x78,0x01]
+          vcvttss2usi (%rcx), %eax
+
+// CHECK: vcvttss2usi 291(%rax,%r14,8), %eax
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x08,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvttss2usi 291(%rax,%r14,8), %eax
+
+// CHECK: vcvttss2usi 508(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x78,0x42,0x7f]
+          vcvttss2usi 508(%rdx), %eax
+
+// CHECK: vcvttss2usi 512(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x78,0x82,0x00,0x02,0x00,0x00]
+          vcvttss2usi 512(%rdx), %eax
+
+// CHECK: vcvttss2usi -512(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x78,0x42,0x80]
+          vcvttss2usi -512(%rdx), %eax
+
+// CHECK: vcvttss2usi -516(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x78,0x82,0xfc,0xfd,0xff,0xff]
+          vcvttss2usi -516(%rdx), %eax
+
+// CHECK: vcvttss2usi %xmm18, %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x08,0x78,0xea]
+          vcvttss2usi %xmm18, %ebp
+
+// CHECK: vcvttss2usi {sae}, %xmm18, %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x18,0x78,0xea]
+          vcvttss2usi {sae}, %xmm18, %ebp
+
+// CHECK: vcvttss2usi (%rcx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x78,0x29]
+          vcvttss2usi (%rcx), %ebp
+
+// CHECK: vcvttss2usi 291(%rax,%r14,8), %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x08,0x78,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcvttss2usi 291(%rax,%r14,8), %ebp
+
+// CHECK: vcvttss2usi 508(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x78,0x6a,0x7f]
+          vcvttss2usi 508(%rdx), %ebp
+
+// CHECK: vcvttss2usi 512(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x78,0xaa,0x00,0x02,0x00,0x00]
+          vcvttss2usi 512(%rdx), %ebp
+
+// CHECK: vcvttss2usi -512(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x78,0x6a,0x80]
+          vcvttss2usi -512(%rdx), %ebp
+
+// CHECK: vcvttss2usi -516(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x78,0xaa,0xfc,0xfd,0xff,0xff]
+          vcvttss2usi -516(%rdx), %ebp
+
+// CHECK: vcvttss2usi %xmm18, %r13d
+// CHECK:  encoding: [0x62,0x31,0x7e,0x08,0x78,0xea]
+          vcvttss2usi %xmm18, %r13d
+
+// CHECK: vcvttss2usi {sae}, %xmm18, %r13d
+// CHECK:  encoding: [0x62,0x31,0x7e,0x18,0x78,0xea]
+          vcvttss2usi {sae}, %xmm18, %r13d
+
+// CHECK: vcvttss2usi (%rcx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7e,0x08,0x78,0x29]
+          vcvttss2usi (%rcx), %r13d
+
+// CHECK: vcvttss2usi 291(%rax,%r14,8), %r13d
+// CHECK:  encoding: [0x62,0x31,0x7e,0x08,0x78,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcvttss2usi 291(%rax,%r14,8), %r13d
+
+// CHECK: vcvttss2usi 508(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7e,0x08,0x78,0x6a,0x7f]
+          vcvttss2usi 508(%rdx), %r13d
+
+// CHECK: vcvttss2usi 512(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7e,0x08,0x78,0xaa,0x00,0x02,0x00,0x00]
+          vcvttss2usi 512(%rdx), %r13d
+
+// CHECK: vcvttss2usi -512(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7e,0x08,0x78,0x6a,0x80]
+          vcvttss2usi -512(%rdx), %r13d
+
+// CHECK: vcvttss2usi -516(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7e,0x08,0x78,0xaa,0xfc,0xfd,0xff,0xff]
+          vcvttss2usi -516(%rdx), %r13d
+
+// CHECK: vcvttss2usi %xmm27, %rax
+// CHECK:  encoding: [0x62,0x91,0xfe,0x08,0x78,0xc3]
+          vcvttss2usi %xmm27, %rax
+
+// CHECK: vcvttss2usi {sae}, %xmm27, %rax
+// CHECK:  encoding: [0x62,0x91,0xfe,0x18,0x78,0xc3]
+          vcvttss2usi {sae}, %xmm27, %rax
+
+// CHECK: vcvttss2usi (%rcx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x08,0x78,0x01]
+          vcvttss2usi (%rcx), %rax
+
+// CHECK: vcvttss2usi 291(%rax,%r14,8), %rax
+// CHECK:  encoding: [0x62,0xb1,0xfe,0x08,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvttss2usi 291(%rax,%r14,8), %rax
+
+// CHECK: vcvttss2usi 508(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x08,0x78,0x42,0x7f]
+          vcvttss2usi 508(%rdx), %rax
+
+// CHECK: vcvttss2usi 512(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x08,0x78,0x82,0x00,0x02,0x00,0x00]
+          vcvttss2usi 512(%rdx), %rax
+
+// CHECK: vcvttss2usi -512(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x08,0x78,0x42,0x80]
+          vcvttss2usi -512(%rdx), %rax
+
+// CHECK: vcvttss2usi -516(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x08,0x78,0x82,0xfc,0xfd,0xff,0xff]
+          vcvttss2usi -516(%rdx), %rax
+
+// CHECK: vcvttss2usi %xmm27, %r8
+// CHECK:  encoding: [0x62,0x11,0xfe,0x08,0x78,0xc3]
+          vcvttss2usi %xmm27, %r8
+
+// CHECK: vcvttss2usi {sae}, %xmm27, %r8
+// CHECK:  encoding: [0x62,0x11,0xfe,0x18,0x78,0xc3]
+          vcvttss2usi {sae}, %xmm27, %r8
+
+// CHECK: vcvttss2usi (%rcx), %r8
+// CHECK:  encoding: [0x62,0x71,0xfe,0x08,0x78,0x01]
+          vcvttss2usi (%rcx), %r8
+
+// CHECK: vcvttss2usi 291(%rax,%r14,8), %r8
+// CHECK:  encoding: [0x62,0x31,0xfe,0x08,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvttss2usi 291(%rax,%r14,8), %r8
+
+// CHECK: vcvttss2usi 508(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xfe,0x08,0x78,0x42,0x7f]
+          vcvttss2usi 508(%rdx), %r8
+
+// CHECK: vcvttss2usi 512(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xfe,0x08,0x78,0x82,0x00,0x02,0x00,0x00]
+          vcvttss2usi 512(%rdx), %r8
+
+// CHECK: vcvttss2usi -512(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xfe,0x08,0x78,0x42,0x80]
+          vcvttss2usi -512(%rdx), %r8
+
+// CHECK: vcvttss2usi -516(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xfe,0x08,0x78,0x82,0xfc,0xfd,0xff,0xff]
+          vcvttss2usi -516(%rdx), %r8
index 08b365d7f5b2179fa3c6d64cf68deeeb2151cbdd..82c6869b541b202fca3b4733f4848647fb635f54 100644 (file)
 // CHECK:  encoding: [0x62,0xe1,0xfc,0x58,0x5b,0xa2,0xf8,0xfb,0xff,0xff]
           vcvtqq2ps -1032(%rdx){1to8}, %ymm20
 
+// CHECK: vcvtqq2ps %zmm19, %ymm28
+// CHECK:  encoding: [0x62,0x21,0xfc,0x48,0x5b,0xe3]
+          vcvtqq2ps %zmm19, %ymm28
+
+// CHECK: vcvtqq2ps %zmm19, %ymm28 {%k3}
+// CHECK:  encoding: [0x62,0x21,0xfc,0x4b,0x5b,0xe3]
+          vcvtqq2ps %zmm19, %ymm28 {%k3}
+
+// CHECK: vcvtqq2ps %zmm19, %ymm28 {%k3} {z}
+// CHECK:  encoding: [0x62,0x21,0xfc,0xcb,0x5b,0xe3]
+          vcvtqq2ps %zmm19, %ymm28 {%k3} {z}
+
+// CHECK: vcvtqq2ps {rn-sae}, %zmm19, %ymm28
+// CHECK:  encoding: [0x62,0x21,0xfc,0x18,0x5b,0xe3]
+          vcvtqq2ps {rn-sae}, %zmm19, %ymm28
+
+// CHECK: vcvtqq2ps {ru-sae}, %zmm19, %ymm28
+// CHECK:  encoding: [0x62,0x21,0xfc,0x58,0x5b,0xe3]
+          vcvtqq2ps {ru-sae}, %zmm19, %ymm28
+
+// CHECK: vcvtqq2ps {rd-sae}, %zmm19, %ymm28
+// CHECK:  encoding: [0x62,0x21,0xfc,0x38,0x5b,0xe3]
+          vcvtqq2ps {rd-sae}, %zmm19, %ymm28
+
+// CHECK: vcvtqq2ps {rz-sae}, %zmm19, %ymm28
+// CHECK:  encoding: [0x62,0x21,0xfc,0x78,0x5b,0xe3]
+          vcvtqq2ps {rz-sae}, %zmm19, %ymm28
+
+// CHECK: vcvtqq2ps (%rcx), %ymm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x48,0x5b,0x21]
+          vcvtqq2ps (%rcx), %ymm28
+
+// CHECK: vcvtqq2ps 4660(%rax,%r14,8), %ymm28
+// CHECK:  encoding: [0x62,0x21,0xfc,0x48,0x5b,0xa4,0xf0,0x34,0x12,0x00,0x00]
+          vcvtqq2ps 4660(%rax,%r14,8), %ymm28
+
+// CHECK: vcvtqq2ps (%rcx){1to8}, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x58,0x5b,0x21]
+          vcvtqq2ps (%rcx){1to8}, %ymm28
+
+// CHECK: vcvtqq2ps 8128(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x48,0x5b,0x62,0x7f]
+          vcvtqq2ps 8128(%rdx), %ymm28
+
+// CHECK: vcvtqq2ps 8192(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x48,0x5b,0xa2,0x00,0x20,0x00,0x00]
+          vcvtqq2ps 8192(%rdx), %ymm28
+
+// CHECK: vcvtqq2ps -8192(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x48,0x5b,0x62,0x80]
+          vcvtqq2ps -8192(%rdx), %ymm28
+
+// CHECK: vcvtqq2ps -8256(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x48,0x5b,0xa2,0xc0,0xdf,0xff,0xff]
+          vcvtqq2ps -8256(%rdx), %ymm28
+
+// CHECK: vcvtqq2ps 1016(%rdx){1to8}, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x58,0x5b,0x62,0x7f]
+          vcvtqq2ps 1016(%rdx){1to8}, %ymm28
+
+// CHECK: vcvtqq2ps 1024(%rdx){1to8}, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x58,0x5b,0xa2,0x00,0x04,0x00,0x00]
+          vcvtqq2ps 1024(%rdx){1to8}, %ymm28
+
+// CHECK: vcvtqq2ps -1024(%rdx){1to8}, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x58,0x5b,0x62,0x80]
+          vcvtqq2ps -1024(%rdx){1to8}, %ymm28
+
+// CHECK: vcvtqq2ps -1032(%rdx){1to8}, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x58,0x5b,0xa2,0xf8,0xfb,0xff,0xff]
+          vcvtqq2ps -1032(%rdx){1to8}, %ymm28
+
 // CHECK: vcvtuqq2pd %zmm29, %zmm21
 // CHECK:  encoding: [0x62,0x81,0xfe,0x48,0x7a,0xed]
           vcvtuqq2pd %zmm29, %zmm21
           vfpclasspsl $0x7b,-516(%rdx){1to16}, %k4
 
 
+// CHECK: vcvtuqq2ps {rn-sae}, %zmm21, %ymm18
+// CHECK:  encoding: [0x62,0xa1,0xff,0x18,0x7a,0xd5]
+          vcvtuqq2ps {rn-sae}, %zmm21, %ymm18
+
+// CHECK: vcvtuqq2ps {ru-sae}, %zmm21, %ymm18
+// CHECK:  encoding: [0x62,0xa1,0xff,0x58,0x7a,0xd5]
+          vcvtuqq2ps {ru-sae}, %zmm21, %ymm18
+
+// CHECK: vcvtuqq2ps {rd-sae}, %zmm21, %ymm18
+// CHECK:  encoding: [0x62,0xa1,0xff,0x38,0x7a,0xd5]
+          vcvtuqq2ps {rd-sae}, %zmm21, %ymm18
+
+// CHECK: vcvtuqq2ps {rz-sae}, %zmm21, %ymm18
+// CHECK:  encoding: [0x62,0xa1,0xff,0x78,0x7a,0xd5]
+          vcvtuqq2ps {rz-sae}, %zmm21, %ymm18
+
+// CHECK: vcvtuqq2ps (%rcx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x48,0x7a,0x11]
+          vcvtuqq2ps (%rcx), %ymm18
+
+// CHECK: vcvtuqq2ps 291(%rax,%r14,8), %ymm18
+// CHECK:  encoding: [0x62,0xa1,0xff,0x48,0x7a,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vcvtuqq2ps 291(%rax,%r14,8), %ymm18
+
+// CHECK: vcvtuqq2ps (%rcx){1to8}, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x58,0x7a,0x11]
+          vcvtuqq2ps (%rcx){1to8}, %ymm18
+
+// CHECK: vcvtuqq2ps 8128(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x48,0x7a,0x52,0x7f]
+          vcvtuqq2ps 8128(%rdx), %ymm18
+
+// CHECK: vcvtuqq2ps 8192(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x48,0x7a,0x92,0x00,0x20,0x00,0x00]
+          vcvtuqq2ps 8192(%rdx), %ymm18
+
+// CHECK: vcvtuqq2ps -8192(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x48,0x7a,0x52,0x80]
+          vcvtuqq2ps -8192(%rdx), %ymm18
+
+// CHECK: vcvtuqq2ps -8256(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x48,0x7a,0x92,0xc0,0xdf,0xff,0xff]
+          vcvtuqq2ps -8256(%rdx), %ymm18
+
+// CHECK: vcvtuqq2ps 1016(%rdx){1to8}, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x58,0x7a,0x52,0x7f]
+          vcvtuqq2ps 1016(%rdx){1to8}, %ymm18
+
+// CHECK: vcvtuqq2ps 1024(%rdx){1to8}, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x58,0x7a,0x92,0x00,0x04,0x00,0x00]
+          vcvtuqq2ps 1024(%rdx){1to8}, %ymm18
+
+// CHECK: vcvtuqq2ps -1024(%rdx){1to8}, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x58,0x7a,0x52,0x80]
+          vcvtuqq2ps -1024(%rdx){1to8}, %ymm18
+
+// CHECK: vcvtuqq2ps -1032(%rdx){1to8}, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x58,0x7a,0x92,0xf8,0xfb,0xff,0xff]
+          vcvtuqq2ps -1032(%rdx){1to8}, %ymm18
+
+// CHECK: vcvtuqq2ps %zmm26, %ymm25
+// CHECK:  encoding: [0x62,0x01,0xff,0x48,0x7a,0xca]
+          vcvtuqq2ps %zmm26, %ymm25
+
+// CHECK: vcvtuqq2ps %zmm26, %ymm25 {%k2}
+// CHECK:  encoding: [0x62,0x01,0xff,0x4a,0x7a,0xca]
+          vcvtuqq2ps %zmm26, %ymm25 {%k2}
+
+// CHECK: vcvtuqq2ps %zmm26, %ymm25 {%k2} {z}
+// CHECK:  encoding: [0x62,0x01,0xff,0xca,0x7a,0xca]
+          vcvtuqq2ps %zmm26, %ymm25 {%k2} {z}
+
+// CHECK: vcvtuqq2ps {rn-sae}, %zmm26, %ymm25
+// CHECK:  encoding: [0x62,0x01,0xff,0x18,0x7a,0xca]
+          vcvtuqq2ps {rn-sae}, %zmm26, %ymm25
+
+// CHECK: vcvtuqq2ps {ru-sae}, %zmm26, %ymm25
+// CHECK:  encoding: [0x62,0x01,0xff,0x58,0x7a,0xca]
+          vcvtuqq2ps {ru-sae}, %zmm26, %ymm25
+
+// CHECK: vcvtuqq2ps {rd-sae}, %zmm26, %ymm25
+// CHECK:  encoding: [0x62,0x01,0xff,0x38,0x7a,0xca]
+          vcvtuqq2ps {rd-sae}, %zmm26, %ymm25
+
+// CHECK: vcvtuqq2ps {rz-sae}, %zmm26, %ymm25
+// CHECK:  encoding: [0x62,0x01,0xff,0x78,0x7a,0xca]
+          vcvtuqq2ps {rz-sae}, %zmm26, %ymm25
+
+// CHECK: vcvtuqq2ps (%rcx), %ymm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0x7a,0x09]
+          vcvtuqq2ps (%rcx), %ymm25
+
+// CHECK: vcvtuqq2ps 4660(%rax,%r14,8), %ymm25
+// CHECK:  encoding: [0x62,0x21,0xff,0x48,0x7a,0x8c,0xf0,0x34,0x12,0x00,0x00]
+          vcvtuqq2ps 4660(%rax,%r14,8), %ymm25
+
+// CHECK: vcvtuqq2ps (%rcx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x58,0x7a,0x09]
+          vcvtuqq2ps (%rcx){1to8}, %ymm25
+
+// CHECK: vcvtuqq2ps 8128(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0x7a,0x4a,0x7f]
+          vcvtuqq2ps 8128(%rdx), %ymm25
+
+// CHECK: vcvtuqq2ps 8192(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0x7a,0x8a,0x00,0x20,0x00,0x00]
+          vcvtuqq2ps 8192(%rdx), %ymm25
+
+// CHECK: vcvtuqq2ps -8192(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0x7a,0x4a,0x80]
+          vcvtuqq2ps -8192(%rdx), %ymm25
+
+// CHECK: vcvtuqq2ps -8256(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0x7a,0x8a,0xc0,0xdf,0xff,0xff]
+          vcvtuqq2ps -8256(%rdx), %ymm25
+
+// CHECK: vcvtuqq2ps 1016(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x58,0x7a,0x4a,0x7f]
+          vcvtuqq2ps 1016(%rdx){1to8}, %ymm25
+
+// CHECK: vcvtuqq2ps 1024(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x58,0x7a,0x8a,0x00,0x04,0x00,0x00]
+          vcvtuqq2ps 1024(%rdx){1to8}, %ymm25
+
+// CHECK: vcvtuqq2ps -1024(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x58,0x7a,0x4a,0x80]
+          vcvtuqq2ps -1024(%rdx){1to8}, %ymm25
+
+// CHECK: vcvtuqq2ps -1032(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x58,0x7a,0x8a,0xf8,0xfb,0xff,0xff]
+          vcvtuqq2ps -1032(%rdx){1to8}, %ymm25
index 31c150873de69b48c0a5af6d2e65ab382d63f2c9..54b77e0219032bf5c2bd3570a0fe47a2a140019b 100644 (file)
 // CHECK:  encoding: [0x62,0x61,0xfc,0x38,0x5b,0x9a,0xf8,0xfb,0xff,0xff]
           vcvtqq2ps -1032(%rdx){1to4}, %xmm27
 
+// CHECK: vcvtqq2ps %xmm26, %xmm30
+// CHECK:  encoding: [0x62,0x01,0xfc,0x08,0x5b,0xf2]
+          vcvtqq2ps %xmm26, %xmm30
+
+// CHECK: vcvtqq2ps %xmm26, %xmm30 {%k4}
+// CHECK:  encoding: [0x62,0x01,0xfc,0x0c,0x5b,0xf2]
+          vcvtqq2ps %xmm26, %xmm30 {%k4}
+
+// CHECK: vcvtqq2ps %xmm26, %xmm30 {%k4} {z}
+// CHECK:  encoding: [0x62,0x01,0xfc,0x8c,0x5b,0xf2]
+          vcvtqq2ps %xmm26, %xmm30 {%k4} {z}
+
+// CHECK: vcvtqq2psx (%rcx), %xmm30
+// CHECK:  encoding: [0x62,0x61,0xfc,0x08,0x5b,0x31]
+          vcvtqq2psx (%rcx), %xmm30
+
+// CHECK: vcvtqq2psx 4660(%rax,%r14,8), %xmm30
+// CHECK:  encoding: [0x62,0x21,0xfc,0x08,0x5b,0xb4,0xf0,0x34,0x12,0x00,0x00]
+          vcvtqq2psx 4660(%rax,%r14,8), %xmm30
+
+// CHECK: vcvtqq2ps (%rcx){1to2}, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xfc,0x18,0x5b,0x31]
+          vcvtqq2ps (%rcx){1to2}, %xmm30
+
+// CHECK: vcvtqq2psx 2032(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x61,0xfc,0x08,0x5b,0x72,0x7f]
+          vcvtqq2psx 2032(%rdx), %xmm30
+
+// CHECK: vcvtqq2psx 2048(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x61,0xfc,0x08,0x5b,0xb2,0x00,0x08,0x00,0x00]
+          vcvtqq2psx 2048(%rdx), %xmm30
+
+// CHECK: vcvtqq2psx -2048(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x61,0xfc,0x08,0x5b,0x72,0x80]
+          vcvtqq2psx -2048(%rdx), %xmm30
+
+// CHECK: vcvtqq2psx -2064(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x61,0xfc,0x08,0x5b,0xb2,0xf0,0xf7,0xff,0xff]
+          vcvtqq2psx -2064(%rdx), %xmm30
+
+// CHECK: vcvtqq2ps 1016(%rdx){1to2}, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xfc,0x18,0x5b,0x72,0x7f]
+          vcvtqq2ps 1016(%rdx){1to2}, %xmm30
+
+// CHECK: vcvtqq2ps 1024(%rdx){1to2}, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xfc,0x18,0x5b,0xb2,0x00,0x04,0x00,0x00]
+          vcvtqq2ps 1024(%rdx){1to2}, %xmm30
+
+// CHECK: vcvtqq2ps -1024(%rdx){1to2}, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xfc,0x18,0x5b,0x72,0x80]
+          vcvtqq2ps -1024(%rdx){1to2}, %xmm30
+
+// CHECK: vcvtqq2ps -1032(%rdx){1to2}, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xfc,0x18,0x5b,0xb2,0xf8,0xfb,0xff,0xff]
+          vcvtqq2ps -1032(%rdx){1to2}, %xmm30
+
+// CHECK: vcvtqq2ps %ymm28, %xmm20
+// CHECK:  encoding: [0x62,0x81,0xfc,0x28,0x5b,0xe4]
+          vcvtqq2ps %ymm28, %xmm20
+
+// CHECK: vcvtqq2ps %ymm28, %xmm20 {%k3}
+// CHECK:  encoding: [0x62,0x81,0xfc,0x2b,0x5b,0xe4]
+          vcvtqq2ps %ymm28, %xmm20 {%k3}
+
+// CHECK: vcvtqq2ps %ymm28, %xmm20 {%k3} {z}
+// CHECK:  encoding: [0x62,0x81,0xfc,0xab,0x5b,0xe4]
+          vcvtqq2ps %ymm28, %xmm20 {%k3} {z}
+
+// CHECK: vcvtqq2psy (%rcx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x28,0x5b,0x21]
+          vcvtqq2psy (%rcx), %xmm20
+
+// CHECK: vcvtqq2psy 4660(%rax,%r14,8), %xmm20
+// CHECK:  encoding: [0x62,0xa1,0xfc,0x28,0x5b,0xa4,0xf0,0x34,0x12,0x00,0x00]
+          vcvtqq2psy 4660(%rax,%r14,8), %xmm20
+
+// CHECK: vcvtqq2ps (%rcx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x38,0x5b,0x21]
+          vcvtqq2ps (%rcx){1to4}, %xmm20
+
+// CHECK: vcvtqq2psy 4064(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x28,0x5b,0x62,0x7f]
+          vcvtqq2psy 4064(%rdx), %xmm20
+
+// CHECK: vcvtqq2psy 4096(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x28,0x5b,0xa2,0x00,0x10,0x00,0x00]
+          vcvtqq2psy 4096(%rdx), %xmm20
+
+// CHECK: vcvtqq2psy -4096(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x28,0x5b,0x62,0x80]
+          vcvtqq2psy -4096(%rdx), %xmm20
+
+// CHECK: vcvtqq2psy -4128(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x28,0x5b,0xa2,0xe0,0xef,0xff,0xff]
+          vcvtqq2psy -4128(%rdx), %xmm20
+
+// CHECK: vcvtqq2ps 1016(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x38,0x5b,0x62,0x7f]
+          vcvtqq2ps 1016(%rdx){1to4}, %xmm20
+
+// CHECK: vcvtqq2ps 1024(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x38,0x5b,0xa2,0x00,0x04,0x00,0x00]
+          vcvtqq2ps 1024(%rdx){1to4}, %xmm20
+
+// CHECK: vcvtqq2ps -1024(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x38,0x5b,0x62,0x80]
+          vcvtqq2ps -1024(%rdx){1to4}, %xmm20
+
+// CHECK: vcvtqq2ps -1032(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x38,0x5b,0xa2,0xf8,0xfb,0xff,0xff]
+          vcvtqq2ps -1032(%rdx){1to4}, %xmm20
+
 // CHECK: vcvtuqq2pd %xmm20, %xmm19
 // CHECK:  encoding: [0x62,0xa1,0xfe,0x08,0x7a,0xdc]
           vcvtuqq2pd %xmm20, %xmm19
 // CHECK:  encoding: [0x62,0x61,0xff,0x38,0x7a,0xa2,0xf8,0xfb,0xff,0xff]
           vcvtuqq2ps -1032(%rdx){1to4}, %xmm28
 
+// CHECK: vcvtuqq2ps %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xa1,0xff,0x08,0x7a,0xee]
+          vcvtuqq2ps %xmm22, %xmm21
+
+// CHECK: vcvtuqq2ps %xmm22, %xmm21 {%k6}
+// CHECK:  encoding: [0x62,0xa1,0xff,0x0e,0x7a,0xee]
+          vcvtuqq2ps %xmm22, %xmm21 {%k6}
+
+// CHECK: vcvtuqq2ps %xmm22, %xmm21 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa1,0xff,0x8e,0x7a,0xee]
+          vcvtuqq2ps %xmm22, %xmm21 {%k6} {z}
+
+// CHECK: vcvtuqq2psx (%rcx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xff,0x08,0x7a,0x29]
+          vcvtuqq2psx (%rcx), %xmm21
+
+// CHECK: vcvtuqq2psx 4660(%rax,%r14,8), %xmm21
+// CHECK:  encoding: [0x62,0xa1,0xff,0x08,0x7a,0xac,0xf0,0x34,0x12,0x00,0x00]
+          vcvtuqq2psx 4660(%rax,%r14,8), %xmm21
+
+// CHECK: vcvtuqq2ps (%rcx){1to2}, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xff,0x18,0x7a,0x29]
+          vcvtuqq2ps (%rcx){1to2}, %xmm21
+
+// CHECK: vcvtuqq2psx 2032(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xff,0x08,0x7a,0x6a,0x7f]
+          vcvtuqq2psx 2032(%rdx), %xmm21
+
+// CHECK: vcvtuqq2psx 2048(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xff,0x08,0x7a,0xaa,0x00,0x08,0x00,0x00]
+          vcvtuqq2psx 2048(%rdx), %xmm21
+
+// CHECK: vcvtuqq2psx -2048(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xff,0x08,0x7a,0x6a,0x80]
+          vcvtuqq2psx -2048(%rdx), %xmm21
+
+// CHECK: vcvtuqq2psx -2064(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xff,0x08,0x7a,0xaa,0xf0,0xf7,0xff,0xff]
+          vcvtuqq2psx -2064(%rdx), %xmm21
+
+// CHECK: vcvtuqq2ps 1016(%rdx){1to2}, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xff,0x18,0x7a,0x6a,0x7f]
+          vcvtuqq2ps 1016(%rdx){1to2}, %xmm21
+
+// CHECK: vcvtuqq2ps 1024(%rdx){1to2}, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xff,0x18,0x7a,0xaa,0x00,0x04,0x00,0x00]
+          vcvtuqq2ps 1024(%rdx){1to2}, %xmm21
+
+// CHECK: vcvtuqq2ps -1024(%rdx){1to2}, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xff,0x18,0x7a,0x6a,0x80]
+          vcvtuqq2ps -1024(%rdx){1to2}, %xmm21
+
+// CHECK: vcvtuqq2ps -1032(%rdx){1to2}, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xff,0x18,0x7a,0xaa,0xf8,0xfb,0xff,0xff]
+          vcvtuqq2ps -1032(%rdx){1to2}, %xmm21
+
+// CHECK: vcvtuqq2ps %ymm17, %xmm26
+// CHECK:  encoding: [0x62,0x21,0xff,0x28,0x7a,0xd1]
+          vcvtuqq2ps %ymm17, %xmm26
+
+// CHECK: vcvtuqq2ps %ymm17, %xmm26 {%k4}
+// CHECK:  encoding: [0x62,0x21,0xff,0x2c,0x7a,0xd1]
+          vcvtuqq2ps %ymm17, %xmm26 {%k4}
+
+// CHECK: vcvtuqq2ps %ymm17, %xmm26 {%k4} {z}
+// CHECK:  encoding: [0x62,0x21,0xff,0xac,0x7a,0xd1]
+          vcvtuqq2ps %ymm17, %xmm26 {%k4} {z}
+
+// CHECK: vcvtuqq2psy (%rcx), %xmm26
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0x7a,0x11]
+          vcvtuqq2psy (%rcx), %xmm26
+
+// CHECK: vcvtuqq2psy 4660(%rax,%r14,8), %xmm26
+// CHECK:  encoding: [0x62,0x21,0xff,0x28,0x7a,0x94,0xf0,0x34,0x12,0x00,0x00]
+          vcvtuqq2psy 4660(%rax,%r14,8), %xmm26
+
+// CHECK: vcvtuqq2ps (%rcx){1to4}, %xmm26
+// CHECK:  encoding: [0x62,0x61,0xff,0x38,0x7a,0x11]
+          vcvtuqq2ps (%rcx){1to4}, %xmm26
+
+// CHECK: vcvtuqq2psy 4064(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0x7a,0x52,0x7f]
+          vcvtuqq2psy 4064(%rdx), %xmm26
+
+// CHECK: vcvtuqq2psy 4096(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0x7a,0x92,0x00,0x10,0x00,0x00]
+          vcvtuqq2psy 4096(%rdx), %xmm26
+
+// CHECK: vcvtuqq2psy -4096(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0x7a,0x52,0x80]
+          vcvtuqq2psy -4096(%rdx), %xmm26
+
+// CHECK: vcvtuqq2psy -4128(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0x7a,0x92,0xe0,0xef,0xff,0xff]
+          vcvtuqq2psy -4128(%rdx), %xmm26
+
+// CHECK: vcvtuqq2ps 1016(%rdx){1to4}, %xmm26
+// CHECK:  encoding: [0x62,0x61,0xff,0x38,0x7a,0x52,0x7f]
+          vcvtuqq2ps 1016(%rdx){1to4}, %xmm26
+
+// CHECK: vcvtuqq2ps 1024(%rdx){1to4}, %xmm26
+// CHECK:  encoding: [0x62,0x61,0xff,0x38,0x7a,0x92,0x00,0x04,0x00,0x00]
+          vcvtuqq2ps 1024(%rdx){1to4}, %xmm26
+
+// CHECK: vcvtuqq2ps -1024(%rdx){1to4}, %xmm26
+// CHECK:  encoding: [0x62,0x61,0xff,0x38,0x7a,0x52,0x80]
+          vcvtuqq2ps -1024(%rdx){1to4}, %xmm26
+
+// CHECK: vcvtuqq2ps -1032(%rdx){1to4}, %xmm26
+// CHECK:  encoding: [0x62,0x61,0xff,0x38,0x7a,0x92,0xf8,0xfb,0xff,0xff]
+          vcvtuqq2ps -1032(%rdx){1to4}, %xmm26
+
 // CHECK: vinsertf64x2 $171, %xmm23, %ymm27, %ymm21
 // CHECK:  encoding: [0x62,0xa3,0xa5,0x20,0x18,0xef,0xab]
           vinsertf64x2 $0xab, %xmm23, %ymm27, %ymm21
index b0d63dcf4da8b86b5a4fa914311ed0c72e037b62..21c3faa90ef4746f0418db0aa33c2b82f753f521 100644 (file)
@@ -20523,3 +20523,338 @@ vaddpd  {rz-sae}, %zmm2, %zmm1, %zmm1
 // CHECK:  encoding: [0x62,0x22,0xfd,0x21,0xa2,0xb4,0xb9,0x00,0x04,0x00,0x00]
           vscatterdpd %ymm30, 1024(%rcx, %xmm31,4) {%k1}
 
+// CHECK: vcvtpd2dq %xmm20, %xmm25
+// CHECK:  encoding: [0x62,0x21,0xff,0x08,0xe6,0xcc]
+          vcvtpd2dq %xmm20, %xmm25
+
+// CHECK: vcvtpd2dq %xmm20, %xmm25 {%k2}
+// CHECK:  encoding: [0x62,0x21,0xff,0x0a,0xe6,0xcc]
+          vcvtpd2dq %xmm20, %xmm25 {%k2}
+
+// CHECK: vcvtpd2dq %xmm20, %xmm25 {%k2} {z}
+// CHECK:  encoding: [0x62,0x21,0xff,0x8a,0xe6,0xcc]
+          vcvtpd2dq %xmm20, %xmm25 {%k2} {z}
+
+// CHECK: vcvtpd2dqx (%rcx), %xmm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x08,0xe6,0x09]
+          vcvtpd2dqx (%rcx), %xmm25
+
+// CHECK: vcvtpd2dqx 291(%rax,%r14,8), %xmm25
+// CHECK:  encoding: [0x62,0x21,0xff,0x08,0xe6,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vcvtpd2dqx 291(%rax,%r14,8), %xmm25
+
+// CHECK: vcvtpd2dq (%rcx){1to2}, %xmm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x18,0xe6,0x09]
+          vcvtpd2dq (%rcx){1to2}, %xmm25
+
+// CHECK: vcvtpd2dqx 2032(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x08,0xe6,0x4a,0x7f]
+          vcvtpd2dqx 2032(%rdx), %xmm25
+
+// CHECK: vcvtpd2dqx 2048(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x08,0xe6,0x8a,0x00,0x08,0x00,0x00]
+          vcvtpd2dqx 2048(%rdx), %xmm25
+
+// CHECK: vcvtpd2dqx -2048(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x08,0xe6,0x4a,0x80]
+          vcvtpd2dqx -2048(%rdx), %xmm25
+
+// CHECK: vcvtpd2dqx -2064(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x08,0xe6,0x8a,0xf0,0xf7,0xff,0xff]
+          vcvtpd2dqx -2064(%rdx), %xmm25
+
+// CHECK: vcvtpd2dq 1016(%rdx){1to2}, %xmm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x18,0xe6,0x4a,0x7f]
+          vcvtpd2dq 1016(%rdx){1to2}, %xmm25
+
+// CHECK: vcvtpd2dq 1024(%rdx){1to2}, %xmm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x18,0xe6,0x8a,0x00,0x04,0x00,0x00]
+          vcvtpd2dq 1024(%rdx){1to2}, %xmm25
+
+// CHECK: vcvtpd2dq -1024(%rdx){1to2}, %xmm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x18,0xe6,0x4a,0x80]
+          vcvtpd2dq -1024(%rdx){1to2}, %xmm25
+
+// CHECK: vcvtpd2dq -1032(%rdx){1to2}, %xmm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x18,0xe6,0x8a,0xf8,0xfb,0xff,0xff]
+          vcvtpd2dq -1032(%rdx){1to2}, %xmm25
+
+// CHECK: vcvtpd2dq %ymm28, %xmm30
+// CHECK:  encoding: [0x62,0x01,0xff,0x28,0xe6,0xf4]
+          vcvtpd2dq %ymm28, %xmm30
+
+// CHECK: vcvtpd2dq %ymm28, %xmm30 {%k3}
+// CHECK:  encoding: [0x62,0x01,0xff,0x2b,0xe6,0xf4]
+          vcvtpd2dq %ymm28, %xmm30 {%k3}
+
+// CHECK: vcvtpd2dq %ymm28, %xmm30 {%k3} {z}
+// CHECK:  encoding: [0x62,0x01,0xff,0xab,0xe6,0xf4]
+          vcvtpd2dq %ymm28, %xmm30 {%k3} {z}
+
+// CHECK: vcvtpd2dqy (%rcx), %xmm30
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0xe6,0x31]
+          vcvtpd2dqy (%rcx), %xmm30
+
+// CHECK: vcvtpd2dqy 291(%rax,%r14,8), %xmm30
+// CHECK:  encoding: [0x62,0x21,0xff,0x28,0xe6,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vcvtpd2dqy 291(%rax,%r14,8), %xmm30
+
+// CHECK: vcvtpd2dq (%rcx){1to4}, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xff,0x38,0xe6,0x31]
+          vcvtpd2dq (%rcx){1to4}, %xmm30
+
+// CHECK: vcvtpd2dqy 4064(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0xe6,0x72,0x7f]
+          vcvtpd2dqy 4064(%rdx), %xmm30
+
+// CHECK: vcvtpd2dqy 4096(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0xe6,0xb2,0x00,0x10,0x00,0x00]
+          vcvtpd2dqy 4096(%rdx), %xmm30
+
+// CHECK: vcvtpd2dqy -4096(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0xe6,0x72,0x80]
+          vcvtpd2dqy -4096(%rdx), %xmm30
+
+// CHECK: vcvtpd2dqy -4128(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0xe6,0xb2,0xe0,0xef,0xff,0xff]
+          vcvtpd2dqy -4128(%rdx), %xmm30
+
+// CHECK: vcvtpd2dq 1016(%rdx){1to4}, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xff,0x38,0xe6,0x72,0x7f]
+          vcvtpd2dq 1016(%rdx){1to4}, %xmm30
+
+// CHECK: vcvtpd2dq 1024(%rdx){1to4}, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xff,0x38,0xe6,0xb2,0x00,0x04,0x00,0x00]
+          vcvtpd2dq 1024(%rdx){1to4}, %xmm30
+
+// CHECK: vcvtpd2dq -1024(%rdx){1to4}, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xff,0x38,0xe6,0x72,0x80]
+          vcvtpd2dq -1024(%rdx){1to4}, %xmm30
+
+// CHECK: vcvtpd2dq -1032(%rdx){1to4}, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xff,0x38,0xe6,0xb2,0xf8,0xfb,0xff,0xff]
+          vcvtpd2dq -1032(%rdx){1to4}, %xmm30
+
+// CHECK: vcvtpd2ps %xmm27, %xmm27
+// CHECK:  encoding: [0x62,0x01,0xfd,0x08,0x5a,0xdb]
+          vcvtpd2ps %xmm27, %xmm27
+
+// CHECK: vcvtpd2ps %xmm27, %xmm27 {%k7}
+// CHECK:  encoding: [0x62,0x01,0xfd,0x0f,0x5a,0xdb]
+          vcvtpd2ps %xmm27, %xmm27 {%k7}
+
+// CHECK: vcvtpd2ps %xmm27, %xmm27 {%k7} {z}
+// CHECK:  encoding: [0x62,0x01,0xfd,0x8f,0x5a,0xdb]
+          vcvtpd2ps %xmm27, %xmm27 {%k7} {z}
+
+// CHECK: vcvtpd2psx (%rcx), %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x5a,0x19]
+          vcvtpd2psx (%rcx), %xmm27
+
+// CHECK: vcvtpd2psx 291(%rax,%r14,8), %xmm27
+// CHECK:  encoding: [0x62,0x21,0xfd,0x08,0x5a,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vcvtpd2psx 291(%rax,%r14,8), %xmm27
+
+// CHECK: vcvtpd2ps (%rcx){1to2}, %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x18,0x5a,0x19]
+          vcvtpd2ps (%rcx){1to2}, %xmm27
+
+// CHECK: vcvtpd2psx 2032(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x5a,0x5a,0x7f]
+          vcvtpd2psx 2032(%rdx), %xmm27
+
+// CHECK: vcvtpd2psx 2048(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x5a,0x9a,0x00,0x08,0x00,0x00]
+          vcvtpd2psx 2048(%rdx), %xmm27
+
+// CHECK: vcvtpd2psx -2048(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x5a,0x5a,0x80]
+          vcvtpd2psx -2048(%rdx), %xmm27
+
+// CHECK: vcvtpd2psx -2064(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x5a,0x9a,0xf0,0xf7,0xff,0xff]
+          vcvtpd2psx -2064(%rdx), %xmm27
+
+// CHECK: vcvtpd2ps 1016(%rdx){1to2}, %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x18,0x5a,0x5a,0x7f]
+          vcvtpd2ps 1016(%rdx){1to2}, %xmm27
+
+// CHECK: vcvtpd2ps 1024(%rdx){1to2}, %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x18,0x5a,0x9a,0x00,0x04,0x00,0x00]
+          vcvtpd2ps 1024(%rdx){1to2}, %xmm27
+
+// CHECK: vcvtpd2ps -1024(%rdx){1to2}, %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x18,0x5a,0x5a,0x80]
+          vcvtpd2ps -1024(%rdx){1to2}, %xmm27
+
+// CHECK: vcvtpd2ps -1032(%rdx){1to2}, %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x18,0x5a,0x9a,0xf8,0xfb,0xff,0xff]
+          vcvtpd2ps -1032(%rdx){1to2}, %xmm27
+
+// CHECK: vcvtpd2ps %ymm20, %xmm20
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x28,0x5a,0xe4]
+          vcvtpd2ps %ymm20, %xmm20
+
+// CHECK: vcvtpd2ps %ymm20, %xmm20 {%k6}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x2e,0x5a,0xe4]
+          vcvtpd2ps %ymm20, %xmm20 {%k6}
+
+// CHECK: vcvtpd2ps %ymm20, %xmm20 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0xae,0x5a,0xe4]
+          vcvtpd2ps %ymm20, %xmm20 {%k6} {z}
+
+// CHECK: vcvtpd2psy (%rcx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x5a,0x21]
+          vcvtpd2psy (%rcx), %xmm20
+
+// CHECK: vcvtpd2psy 291(%rax,%r14,8), %xmm20
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x28,0x5a,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vcvtpd2psy 291(%rax,%r14,8), %xmm20
+
+// CHECK: vcvtpd2ps (%rcx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x38,0x5a,0x21]
+          vcvtpd2ps (%rcx){1to4}, %xmm20
+
+// CHECK: vcvtpd2psy 4064(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x5a,0x62,0x7f]
+          vcvtpd2psy 4064(%rdx), %xmm20
+
+// CHECK: vcvtpd2psy 4096(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x5a,0xa2,0x00,0x10,0x00,0x00]
+          vcvtpd2psy 4096(%rdx), %xmm20
+
+// CHECK: vcvtpd2psy -4096(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x5a,0x62,0x80]
+          vcvtpd2psy -4096(%rdx), %xmm20
+
+// CHECK: vcvtpd2psy -4128(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x5a,0xa2,0xe0,0xef,0xff,0xff]
+          vcvtpd2psy -4128(%rdx), %xmm20
+
+// CHECK: vcvtpd2ps 1016(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x38,0x5a,0x62,0x7f]
+          vcvtpd2ps 1016(%rdx){1to4}, %xmm20
+
+// CHECK: vcvtpd2ps 1024(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x38,0x5a,0xa2,0x00,0x04,0x00,0x00]
+          vcvtpd2ps 1024(%rdx){1to4}, %xmm20
+
+// CHECK: vcvtpd2ps -1024(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x38,0x5a,0x62,0x80]
+          vcvtpd2ps -1024(%rdx){1to4}, %xmm20
+
+// CHECK: vcvtpd2ps -1032(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x38,0x5a,0xa2,0xf8,0xfb,0xff,0xff]
+          vcvtpd2ps -1032(%rdx){1to4}, %xmm20
+
+// CHECK: vcvttpd2udq %xmm26, %xmm23
+// CHECK:  encoding: [0x62,0x81,0xfc,0x08,0x78,0xfa]
+          vcvttpd2udq %xmm26, %xmm23
+
+// CHECK: vcvttpd2udq %xmm26, %xmm23 {%k2}
+// CHECK:  encoding: [0x62,0x81,0xfc,0x0a,0x78,0xfa]
+          vcvttpd2udq %xmm26, %xmm23 {%k2}
+
+// CHECK: vcvttpd2udq %xmm26, %xmm23 {%k2} {z}
+// CHECK:  encoding: [0x62,0x81,0xfc,0x8a,0x78,0xfa]
+          vcvttpd2udq %xmm26, %xmm23 {%k2} {z}
+
+// CHECK: vcvttpd2udqx (%rcx), %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x08,0x78,0x39]
+          vcvttpd2udqx (%rcx), %xmm23
+
+// CHECK: vcvttpd2udqx 291(%rax,%r14,8), %xmm23
+// CHECK:  encoding: [0x62,0xa1,0xfc,0x08,0x78,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vcvttpd2udqx 291(%rax,%r14,8), %xmm23
+
+// CHECK: vcvttpd2udq (%rcx){1to2}, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x18,0x78,0x39]
+          vcvttpd2udq (%rcx){1to2}, %xmm23
+
+// CHECK: vcvttpd2udqx 2032(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x08,0x78,0x7a,0x7f]
+          vcvttpd2udqx 2032(%rdx), %xmm23
+
+// CHECK: vcvttpd2udqx 2048(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x08,0x78,0xba,0x00,0x08,0x00,0x00]
+          vcvttpd2udqx 2048(%rdx), %xmm23
+
+// CHECK: vcvttpd2udqx -2048(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x08,0x78,0x7a,0x80]
+          vcvttpd2udqx -2048(%rdx), %xmm23
+
+// CHECK: vcvttpd2udqx -2064(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x08,0x78,0xba,0xf0,0xf7,0xff,0xff]
+          vcvttpd2udqx -2064(%rdx), %xmm23
+
+// CHECK: vcvttpd2udq 1016(%rdx){1to2}, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x18,0x78,0x7a,0x7f]
+          vcvttpd2udq 1016(%rdx){1to2}, %xmm23
+
+// CHECK: vcvttpd2udq 1024(%rdx){1to2}, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x18,0x78,0xba,0x00,0x04,0x00,0x00]
+          vcvttpd2udq 1024(%rdx){1to2}, %xmm23
+
+// CHECK: vcvttpd2udq -1024(%rdx){1to2}, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x18,0x78,0x7a,0x80]
+          vcvttpd2udq -1024(%rdx){1to2}, %xmm23
+
+// CHECK: vcvttpd2udq -1032(%rdx){1to2}, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x18,0x78,0xba,0xf8,0xfb,0xff,0xff]
+          vcvttpd2udq -1032(%rdx){1to2}, %xmm23
+
+// CHECK: vcvttpd2udq %ymm23, %xmm28
+// CHECK:  encoding: [0x62,0x21,0xfc,0x28,0x78,0xe7]
+          vcvttpd2udq %ymm23, %xmm28
+
+// CHECK: vcvttpd2udq %ymm23, %xmm28 {%k6}
+// CHECK:  encoding: [0x62,0x21,0xfc,0x2e,0x78,0xe7]
+          vcvttpd2udq %ymm23, %xmm28 {%k6}
+
+// CHECK: vcvttpd2udq %ymm23, %xmm28 {%k6} {z}
+// CHECK:  encoding: [0x62,0x21,0xfc,0xae,0x78,0xe7]
+          vcvttpd2udq %ymm23, %xmm28 {%k6} {z}
+
+// CHECK: vcvttpd2udqy (%rcx), %xmm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x28,0x78,0x21]
+          vcvttpd2udqy (%rcx), %xmm28
+
+// CHECK: vcvttpd2udqy 291(%rax,%r14,8), %xmm28
+// CHECK:  encoding: [0x62,0x21,0xfc,0x28,0x78,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vcvttpd2udqy 291(%rax,%r14,8), %xmm28
+
+// CHECK: vcvttpd2udq (%rcx){1to4}, %xmm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x38,0x78,0x21]
+          vcvttpd2udq (%rcx){1to4}, %xmm28
+
+// CHECK: vcvttpd2udqy 4064(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x28,0x78,0x62,0x7f]
+          vcvttpd2udqy 4064(%rdx), %xmm28
+
+// CHECK: vcvttpd2udqy 4096(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x28,0x78,0xa2,0x00,0x10,0x00,0x00]
+          vcvttpd2udqy 4096(%rdx), %xmm28
+
+// CHECK: vcvttpd2udqy -4096(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x28,0x78,0x62,0x80]
+          vcvttpd2udqy -4096(%rdx), %xmm28
+
+// CHECK: vcvttpd2udqy -4128(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x28,0x78,0xa2,0xe0,0xef,0xff,0xff]
+          vcvttpd2udqy -4128(%rdx), %xmm28
+
+// CHECK: vcvttpd2udq 1016(%rdx){1to4}, %xmm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x38,0x78,0x62,0x7f]
+          vcvttpd2udq 1016(%rdx){1to4}, %xmm28
+
+// CHECK: vcvttpd2udq 1024(%rdx){1to4}, %xmm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x38,0x78,0xa2,0x00,0x04,0x00,0x00]
+          vcvttpd2udq 1024(%rdx){1to4}, %xmm28
+
+// CHECK: vcvttpd2udq -1024(%rdx){1to4}, %xmm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x38,0x78,0x62,0x80]
+          vcvttpd2udq -1024(%rdx){1to4}, %xmm28
+
+// CHECK: vcvttpd2udq -1032(%rdx){1to4}, %xmm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x38,0x78,0xa2,0xf8,0xfb,0xff,0xff]
+          vcvttpd2udq -1032(%rdx){1to4}, %xmm28