From 23dc4bbf1da37a4a80b38ddd564c4a0c783d0ee5 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Wed, 3 Jun 2015 10:56:40 +0000 Subject: [PATCH] AVX-512: Implemented SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2 instructions for SKX and KNL. Added tests for encoding. By Igor Breger (igor.breger@intel.com) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238917 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 1 + lib/Target/X86/X86ISelLowering.h | 2 + lib/Target/X86/X86InstrAVX512.td | 64 ++++++- lib/Target/X86/X86InstrFragmentsSIMD.td | 3 +- test/MC/X86/avx512-encodings.s | 240 ++++++++++++++++++++++++ test/MC/X86/x86-64-avx512f_vl.s | 240 ++++++++++++++++++++++++ 6 files changed, 541 insertions(+), 9 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index efc2f497ba9..5e1441ff7f8 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -18301,6 +18301,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW"; case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW"; case X86ISD::SHUFP: return "X86ISD::SHUFP"; + case X86ISD::SHUF128: return "X86ISD::SHUF128"; case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS"; case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD"; case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index a403ee830bb..fc412cd9468 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -354,6 +354,8 @@ namespace llvm { PSHUFHW, PSHUFLW, SHUFP, + //Shuffle Packed Values at 128-bit granularity + SHUF128, MOVDDUP, MOVSHDUP, MOVSLDUP, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 15fd0601652..24c72001197 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -5996,6 +5996,34 @@ multiclass avx512_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, } } +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) +// op(reg_vec2,mem_vec,imm) +// op(reg_vec2,broadcast(eltVt),imm) +multiclass avx512_3Op_imm8 opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _>{ + defm rri : AVX512_maskable; + let mayLoad = 1 in { + defm rmi : AVX512_maskable; + defm rmbi : AVX512_maskable, EVEX_B; + } +} + //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) // op(reg_vec2,mem_scalar,imm) //all instruction created with FROUND_CURRENT @@ -6048,18 +6076,18 @@ multiclass avx512_fp_sae_scalar_imm opc, string OpcodeStr, multiclass avx512_common_fp_sae_packed_imm opc, SDNode OpNode, Predicate prd>{ - let Predicates = [prd] in { - defm Z : avx512_fp_packed_imm, + let Predicates = [prd] in { + defm Z : avx512_fp_packed_imm, avx512_fp_sae_packed_imm, EVEX_V512; - } - let Predicates = [prd, HasVLX] in { - defm Z128 : avx512_fp_packed_imm, + } + let Predicates = [prd, HasVLX] in { + defm Z128 : avx512_fp_packed_imm, EVEX_V128; - defm Z256 : avx512_fp_packed_imm, + defm Z256 : avx512_fp_packed_imm, EVEX_V256; - } + } } multiclass avx512_common_fp_sae_scalar_imm, avx512_fp_sae_scalar_imm; - } + } } defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd", @@ -6098,3 +6126,23 @@ defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 0x51, X86VRange, HasDQI>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; + +multiclass avx512_shuff_packed_128 opc, SDNode OpNode = X86Shuf128>{ + let Predicates = [HasAVX512] in { + defm Z : avx512_3Op_imm8, EVEX_V512; + + } + let Predicates = [HasAVX512, HasVLX] in { + defm Z256 : avx512_3Op_imm8, EVEX_V256; + } +} + +defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; +defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2",avx512vl_f64_info, 0x23>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; +defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 488dfc7a88d..bb894bb9f3b 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -249,7 +249,8 @@ def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>; def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>; -def X86Shufp : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>; +def X86Shufp : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>; +def X86Shuf128 : SDNode<"X86ISD::SHUF128", SDTShuff3OpI>; def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>; def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>; diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s index d2ec6bdc307..4232fff5e95 100644 --- a/test/MC/X86/avx512-encodings.s +++ b/test/MC/X86/avx512-encodings.s @@ -8016,3 +8016,243 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2 // CHECK: encoding: [0x62,0xe1,0x7d,0x58,0x70,0x9a,0xfc,0xfd,0xff,0xff,0x7b] vpshufd $123, -516(%rdx){1to16}, %zmm19 +// CHECK: vshuff32x4 $171, %zmm3, %zmm24, %zmm6 +// CHECK: encoding: [0x62,0xf3,0x3d,0x40,0x23,0xf3,0xab] + vshuff32x4 $171, %zmm3, %zmm24, %zmm6 + +// CHECK: vshuff32x4 $171, %zmm3, %zmm24, %zmm6 {%k2} +// CHECK: encoding: [0x62,0xf3,0x3d,0x42,0x23,0xf3,0xab] + vshuff32x4 $171, %zmm3, %zmm24, %zmm6 {%k2} + +// CHECK: vshuff32x4 $171, %zmm3, %zmm24, %zmm6 {%k2} {z} +// CHECK: encoding: [0x62,0xf3,0x3d,0xc2,0x23,0xf3,0xab] + vshuff32x4 $171, %zmm3, %zmm24, %zmm6 {%k2} {z} + +// CHECK: vshuff32x4 $123, %zmm3, %zmm24, %zmm6 +// CHECK: encoding: [0x62,0xf3,0x3d,0x40,0x23,0xf3,0x7b] + vshuff32x4 $123, %zmm3, %zmm24, %zmm6 + +// CHECK: vshuff32x4 $123, (%rcx), %zmm24, %zmm6 +// CHECK: encoding: [0x62,0xf3,0x3d,0x40,0x23,0x31,0x7b] + vshuff32x4 $123, (%rcx), %zmm24, %zmm6 + +// CHECK: vshuff32x4 $123, 291(%rax,%r14,8), %zmm24, %zmm6 +// CHECK: encoding: [0x62,0xb3,0x3d,0x40,0x23,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vshuff32x4 $123, 291(%rax,%r14,8), %zmm24, %zmm6 + +// CHECK: vshuff32x4 $123, (%rcx){1to16}, %zmm24, %zmm6 +// CHECK: encoding: [0x62,0xf3,0x3d,0x50,0x23,0x31,0x7b] + vshuff32x4 $123, (%rcx){1to16}, %zmm24, %zmm6 + +// CHECK: vshuff32x4 $123, 8128(%rdx), %zmm24, %zmm6 +// CHECK: encoding: [0x62,0xf3,0x3d,0x40,0x23,0x72,0x7f,0x7b] + vshuff32x4 $123, 8128(%rdx), %zmm24, %zmm6 + +// CHECK: vshuff32x4 $123, 8192(%rdx), %zmm24, %zmm6 +// CHECK: encoding: [0x62,0xf3,0x3d,0x40,0x23,0xb2,0x00,0x20,0x00,0x00,0x7b] + vshuff32x4 $123, 8192(%rdx), %zmm24, %zmm6 + +// CHECK: vshuff32x4 $123, -8192(%rdx), %zmm24, %zmm6 +// CHECK: encoding: [0x62,0xf3,0x3d,0x40,0x23,0x72,0x80,0x7b] + vshuff32x4 $123, -8192(%rdx), %zmm24, %zmm6 + +// CHECK: vshuff32x4 $123, -8256(%rdx), %zmm24, %zmm6 +// CHECK: encoding: [0x62,0xf3,0x3d,0x40,0x23,0xb2,0xc0,0xdf,0xff,0xff,0x7b] + vshuff32x4 $123, -8256(%rdx), %zmm24, %zmm6 + +// CHECK: vshuff32x4 $123, 508(%rdx){1to16}, %zmm24, %zmm6 +// CHECK: encoding: [0x62,0xf3,0x3d,0x50,0x23,0x72,0x7f,0x7b] + vshuff32x4 $123, 508(%rdx){1to16}, %zmm24, %zmm6 + +// CHECK: vshuff32x4 $123, 512(%rdx){1to16}, %zmm24, %zmm6 +// CHECK: encoding: [0x62,0xf3,0x3d,0x50,0x23,0xb2,0x00,0x02,0x00,0x00,0x7b] + vshuff32x4 $123, 512(%rdx){1to16}, %zmm24, %zmm6 + +// CHECK: vshuff32x4 $123, -512(%rdx){1to16}, %zmm24, %zmm6 +// CHECK: encoding: [0x62,0xf3,0x3d,0x50,0x23,0x72,0x80,0x7b] + vshuff32x4 $123, -512(%rdx){1to16}, %zmm24, %zmm6 + +// CHECK: vshuff32x4 $123, -516(%rdx){1to16}, %zmm24, %zmm6 +// CHECK: encoding: [0x62,0xf3,0x3d,0x50,0x23,0xb2,0xfc,0xfd,0xff,0xff,0x7b] + vshuff32x4 $123, -516(%rdx){1to16}, %zmm24, %zmm6 + +// CHECK: vshuff64x2 $171, %zmm11, %zmm25, %zmm15 +// CHECK: encoding: [0x62,0x53,0xb5,0x40,0x23,0xfb,0xab] + vshuff64x2 $171, %zmm11, %zmm25, %zmm15 + +// CHECK: vshuff64x2 $171, %zmm11, %zmm25, %zmm15 {%k2} +// CHECK: encoding: [0x62,0x53,0xb5,0x42,0x23,0xfb,0xab] + vshuff64x2 $171, %zmm11, %zmm25, %zmm15 {%k2} + +// CHECK: vshuff64x2 $171, %zmm11, %zmm25, %zmm15 {%k2} {z} +// CHECK: encoding: [0x62,0x53,0xb5,0xc2,0x23,0xfb,0xab] + vshuff64x2 $171, %zmm11, %zmm25, %zmm15 {%k2} {z} + +// CHECK: vshuff64x2 $123, %zmm11, %zmm25, %zmm15 +// CHECK: encoding: [0x62,0x53,0xb5,0x40,0x23,0xfb,0x7b] + vshuff64x2 $123, %zmm11, %zmm25, %zmm15 + +// CHECK: vshuff64x2 $123, (%rcx), %zmm25, %zmm15 +// CHECK: encoding: [0x62,0x73,0xb5,0x40,0x23,0x39,0x7b] + vshuff64x2 $123, (%rcx), %zmm25, %zmm15 + +// CHECK: vshuff64x2 $123, 291(%rax,%r14,8), %zmm25, %zmm15 +// CHECK: encoding: [0x62,0x33,0xb5,0x40,0x23,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b] + vshuff64x2 $123, 291(%rax,%r14,8), %zmm25, %zmm15 + +// CHECK: vshuff64x2 $123, (%rcx){1to8}, %zmm25, %zmm15 +// CHECK: encoding: [0x62,0x73,0xb5,0x50,0x23,0x39,0x7b] + vshuff64x2 $123, (%rcx){1to8}, %zmm25, %zmm15 + +// CHECK: vshuff64x2 $123, 8128(%rdx), %zmm25, %zmm15 +// CHECK: encoding: [0x62,0x73,0xb5,0x40,0x23,0x7a,0x7f,0x7b] + vshuff64x2 $123, 8128(%rdx), %zmm25, %zmm15 + +// CHECK: vshuff64x2 $123, 8192(%rdx), %zmm25, %zmm15 +// CHECK: encoding: [0x62,0x73,0xb5,0x40,0x23,0xba,0x00,0x20,0x00,0x00,0x7b] + vshuff64x2 $123, 8192(%rdx), %zmm25, %zmm15 + +// CHECK: vshuff64x2 $123, -8192(%rdx), %zmm25, %zmm15 +// CHECK: encoding: [0x62,0x73,0xb5,0x40,0x23,0x7a,0x80,0x7b] + vshuff64x2 $123, -8192(%rdx), %zmm25, %zmm15 + +// CHECK: vshuff64x2 $123, -8256(%rdx), %zmm25, %zmm15 +// CHECK: encoding: [0x62,0x73,0xb5,0x40,0x23,0xba,0xc0,0xdf,0xff,0xff,0x7b] + vshuff64x2 $123, -8256(%rdx), %zmm25, %zmm15 + +// CHECK: vshuff64x2 $123, 1016(%rdx){1to8}, %zmm25, %zmm15 +// CHECK: encoding: [0x62,0x73,0xb5,0x50,0x23,0x7a,0x7f,0x7b] + vshuff64x2 $123, 1016(%rdx){1to8}, %zmm25, %zmm15 + +// CHECK: vshuff64x2 $123, 1024(%rdx){1to8}, %zmm25, %zmm15 +// CHECK: encoding: [0x62,0x73,0xb5,0x50,0x23,0xba,0x00,0x04,0x00,0x00,0x7b] + vshuff64x2 $123, 1024(%rdx){1to8}, %zmm25, %zmm15 + +// CHECK: vshuff64x2 $123, -1024(%rdx){1to8}, %zmm25, %zmm15 +// CHECK: encoding: [0x62,0x73,0xb5,0x50,0x23,0x7a,0x80,0x7b] + vshuff64x2 $123, -1024(%rdx){1to8}, %zmm25, %zmm15 + +// CHECK: vshuff64x2 $123, -1032(%rdx){1to8}, %zmm25, %zmm15 +// CHECK: encoding: [0x62,0x73,0xb5,0x50,0x23,0xba,0xf8,0xfb,0xff,0xff,0x7b] + vshuff64x2 $123, -1032(%rdx){1to8}, %zmm25, %zmm15 + +// CHECK: vshufi32x4 $171, %zmm25, %zmm28, %zmm1 +// CHECK: encoding: [0x62,0x93,0x1d,0x40,0x43,0xc9,0xab] + vshufi32x4 $171, %zmm25, %zmm28, %zmm1 + +// CHECK: vshufi32x4 $171, %zmm25, %zmm28, %zmm1 {%k4} +// CHECK: encoding: [0x62,0x93,0x1d,0x44,0x43,0xc9,0xab] + vshufi32x4 $171, %zmm25, %zmm28, %zmm1 {%k4} + +// CHECK: vshufi32x4 $171, %zmm25, %zmm28, %zmm1 {%k4} {z} +// CHECK: encoding: [0x62,0x93,0x1d,0xc4,0x43,0xc9,0xab] + vshufi32x4 $171, %zmm25, %zmm28, %zmm1 {%k4} {z} + +// CHECK: vshufi32x4 $123, %zmm25, %zmm28, %zmm1 +// CHECK: encoding: [0x62,0x93,0x1d,0x40,0x43,0xc9,0x7b] + vshufi32x4 $123, %zmm25, %zmm28, %zmm1 + +// CHECK: vshufi32x4 $123, (%rcx), %zmm28, %zmm1 +// CHECK: encoding: [0x62,0xf3,0x1d,0x40,0x43,0x09,0x7b] + vshufi32x4 $123, (%rcx), %zmm28, %zmm1 + +// CHECK: vshufi32x4 $123, 291(%rax,%r14,8), %zmm28, %zmm1 +// CHECK: encoding: [0x62,0xb3,0x1d,0x40,0x43,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vshufi32x4 $123, 291(%rax,%r14,8), %zmm28, %zmm1 + +// CHECK: vshufi32x4 $123, (%rcx){1to16}, %zmm28, %zmm1 +// CHECK: encoding: [0x62,0xf3,0x1d,0x50,0x43,0x09,0x7b] + vshufi32x4 $123, (%rcx){1to16}, %zmm28, %zmm1 + +// CHECK: vshufi32x4 $123, 8128(%rdx), %zmm28, %zmm1 +// CHECK: encoding: [0x62,0xf3,0x1d,0x40,0x43,0x4a,0x7f,0x7b] + vshufi32x4 $123, 8128(%rdx), %zmm28, %zmm1 + +// CHECK: vshufi32x4 $123, 8192(%rdx), %zmm28, %zmm1 +// CHECK: encoding: [0x62,0xf3,0x1d,0x40,0x43,0x8a,0x00,0x20,0x00,0x00,0x7b] + vshufi32x4 $123, 8192(%rdx), %zmm28, %zmm1 + +// CHECK: vshufi32x4 $123, -8192(%rdx), %zmm28, %zmm1 +// CHECK: encoding: [0x62,0xf3,0x1d,0x40,0x43,0x4a,0x80,0x7b] + vshufi32x4 $123, -8192(%rdx), %zmm28, %zmm1 + +// CHECK: vshufi32x4 $123, -8256(%rdx), %zmm28, %zmm1 +// CHECK: encoding: [0x62,0xf3,0x1d,0x40,0x43,0x8a,0xc0,0xdf,0xff,0xff,0x7b] + vshufi32x4 $123, -8256(%rdx), %zmm28, %zmm1 + +// CHECK: vshufi32x4 $123, 508(%rdx){1to16}, %zmm28, %zmm1 +// CHECK: encoding: [0x62,0xf3,0x1d,0x50,0x43,0x4a,0x7f,0x7b] + vshufi32x4 $123, 508(%rdx){1to16}, %zmm28, %zmm1 + +// CHECK: vshufi32x4 $123, 512(%rdx){1to16}, %zmm28, %zmm1 +// CHECK: encoding: [0x62,0xf3,0x1d,0x50,0x43,0x8a,0x00,0x02,0x00,0x00,0x7b] + vshufi32x4 $123, 512(%rdx){1to16}, %zmm28, %zmm1 + +// CHECK: vshufi32x4 $123, -512(%rdx){1to16}, %zmm28, %zmm1 +// CHECK: encoding: [0x62,0xf3,0x1d,0x50,0x43,0x4a,0x80,0x7b] + vshufi32x4 $123, -512(%rdx){1to16}, %zmm28, %zmm1 + +// CHECK: vshufi32x4 $123, -516(%rdx){1to16}, %zmm28, %zmm1 +// CHECK: encoding: [0x62,0xf3,0x1d,0x50,0x43,0x8a,0xfc,0xfd,0xff,0xff,0x7b] + vshufi32x4 $123, -516(%rdx){1to16}, %zmm28, %zmm1 + +// CHECK: vshufi64x2 $171, %zmm19, %zmm16, %zmm3 +// CHECK: encoding: [0x62,0xb3,0xfd,0x40,0x43,0xdb,0xab] + vshufi64x2 $171, %zmm19, %zmm16, %zmm3 + +// CHECK: vshufi64x2 $171, %zmm19, %zmm16, %zmm3 {%k7} +// CHECK: encoding: [0x62,0xb3,0xfd,0x47,0x43,0xdb,0xab] + vshufi64x2 $171, %zmm19, %zmm16, %zmm3 {%k7} + +// CHECK: vshufi64x2 $171, %zmm19, %zmm16, %zmm3 {%k7} {z} +// CHECK: encoding: [0x62,0xb3,0xfd,0xc7,0x43,0xdb,0xab] + vshufi64x2 $171, %zmm19, %zmm16, %zmm3 {%k7} {z} + +// CHECK: vshufi64x2 $123, %zmm19, %zmm16, %zmm3 +// CHECK: encoding: [0x62,0xb3,0xfd,0x40,0x43,0xdb,0x7b] + vshufi64x2 $123, %zmm19, %zmm16, %zmm3 + +// CHECK: vshufi64x2 $123, (%rcx), %zmm16, %zmm3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x40,0x43,0x19,0x7b] + vshufi64x2 $123, (%rcx), %zmm16, %zmm3 + +// CHECK: vshufi64x2 $123, 291(%rax,%r14,8), %zmm16, %zmm3 +// CHECK: encoding: [0x62,0xb3,0xfd,0x40,0x43,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vshufi64x2 $123, 291(%rax,%r14,8), %zmm16, %zmm3 + +// CHECK: vshufi64x2 $123, (%rcx){1to8}, %zmm16, %zmm3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x50,0x43,0x19,0x7b] + vshufi64x2 $123, (%rcx){1to8}, %zmm16, %zmm3 + +// CHECK: vshufi64x2 $123, 8128(%rdx), %zmm16, %zmm3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x40,0x43,0x5a,0x7f,0x7b] + vshufi64x2 $123, 8128(%rdx), %zmm16, %zmm3 + +// CHECK: vshufi64x2 $123, 8192(%rdx), %zmm16, %zmm3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x40,0x43,0x9a,0x00,0x20,0x00,0x00,0x7b] + vshufi64x2 $123, 8192(%rdx), %zmm16, %zmm3 + +// CHECK: vshufi64x2 $123, -8192(%rdx), %zmm16, %zmm3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x40,0x43,0x5a,0x80,0x7b] + vshufi64x2 $123, -8192(%rdx), %zmm16, %zmm3 + +// CHECK: vshufi64x2 $123, -8256(%rdx), %zmm16, %zmm3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x40,0x43,0x9a,0xc0,0xdf,0xff,0xff,0x7b] + vshufi64x2 $123, -8256(%rdx), %zmm16, %zmm3 + +// CHECK: vshufi64x2 $123, 1016(%rdx){1to8}, %zmm16, %zmm3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x50,0x43,0x5a,0x7f,0x7b] + vshufi64x2 $123, 1016(%rdx){1to8}, %zmm16, %zmm3 + +// CHECK: vshufi64x2 $123, 1024(%rdx){1to8}, %zmm16, %zmm3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x50,0x43,0x9a,0x00,0x04,0x00,0x00,0x7b] + vshufi64x2 $123, 1024(%rdx){1to8}, %zmm16, %zmm3 + +// CHECK: vshufi64x2 $123, -1024(%rdx){1to8}, %zmm16, %zmm3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x50,0x43,0x5a,0x80,0x7b] + vshufi64x2 $123, -1024(%rdx){1to8}, %zmm16, %zmm3 + +// CHECK: vshufi64x2 $123, -1032(%rdx){1to8}, %zmm16, %zmm3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x50,0x43,0x9a,0xf8,0xfb,0xff,0xff,0x7b] + vshufi64x2 $123, -1032(%rdx){1to8}, %zmm16, %zmm3 + diff --git a/test/MC/X86/x86-64-avx512f_vl.s b/test/MC/X86/x86-64-avx512f_vl.s index 24caa456724..dd2a49d7046 100644 --- a/test/MC/X86/x86-64-avx512f_vl.s +++ b/test/MC/X86/x86-64-avx512f_vl.s @@ -10549,3 +10549,243 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1 // CHECK: encoding: [0x62,0xe1,0x7d,0x38,0x70,0xa2,0xfc,0xfd,0xff,0xff,0x7b] vpshufd $123, -516(%rdx){1to8}, %ymm20 +// CHECK: vshuff32x4 $171, %ymm18, %ymm27, %ymm29 +// CHECK: encoding: [0x62,0x23,0x25,0x20,0x23,0xea,0xab] + vshuff32x4 $0xab, %ymm18, %ymm27, %ymm29 + +// CHECK: vshuff32x4 $171, %ymm18, %ymm27, %ymm29 {%k7} +// CHECK: encoding: [0x62,0x23,0x25,0x27,0x23,0xea,0xab] + vshuff32x4 $0xab, %ymm18, %ymm27, %ymm29 {%k7} + +// CHECK: vshuff32x4 $171, %ymm18, %ymm27, %ymm29 {%k7} {z} +// CHECK: encoding: [0x62,0x23,0x25,0xa7,0x23,0xea,0xab] + vshuff32x4 $0xab, %ymm18, %ymm27, %ymm29 {%k7} {z} + +// CHECK: vshuff32x4 $123, %ymm18, %ymm27, %ymm29 +// CHECK: encoding: [0x62,0x23,0x25,0x20,0x23,0xea,0x7b] + vshuff32x4 $0x7b, %ymm18, %ymm27, %ymm29 + +// CHECK: vshuff32x4 $123, (%rcx), %ymm27, %ymm29 +// CHECK: encoding: [0x62,0x63,0x25,0x20,0x23,0x29,0x7b] + vshuff32x4 $0x7b, (%rcx), %ymm27, %ymm29 + +// CHECK: vshuff32x4 $123, 291(%rax,%r14,8), %ymm27, %ymm29 +// CHECK: encoding: [0x62,0x23,0x25,0x20,0x23,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b] + vshuff32x4 $0x7b, 291(%rax,%r14,8), %ymm27, %ymm29 + +// CHECK: vshuff32x4 $123, (%rcx){1to8}, %ymm27, %ymm29 +// CHECK: encoding: [0x62,0x63,0x25,0x30,0x23,0x29,0x7b] + vshuff32x4 $0x7b, (%rcx){1to8}, %ymm27, %ymm29 + +// CHECK: vshuff32x4 $123, 4064(%rdx), %ymm27, %ymm29 +// CHECK: encoding: [0x62,0x63,0x25,0x20,0x23,0x6a,0x7f,0x7b] + vshuff32x4 $0x7b, 4064(%rdx), %ymm27, %ymm29 + +// CHECK: vshuff32x4 $123, 4096(%rdx), %ymm27, %ymm29 +// CHECK: encoding: [0x62,0x63,0x25,0x20,0x23,0xaa,0x00,0x10,0x00,0x00,0x7b] + vshuff32x4 $0x7b, 4096(%rdx), %ymm27, %ymm29 + +// CHECK: vshuff32x4 $123, -4096(%rdx), %ymm27, %ymm29 +// CHECK: encoding: [0x62,0x63,0x25,0x20,0x23,0x6a,0x80,0x7b] + vshuff32x4 $0x7b, -4096(%rdx), %ymm27, %ymm29 + +// CHECK: vshuff32x4 $123, -4128(%rdx), %ymm27, %ymm29 +// CHECK: encoding: [0x62,0x63,0x25,0x20,0x23,0xaa,0xe0,0xef,0xff,0xff,0x7b] + vshuff32x4 $0x7b, -4128(%rdx), %ymm27, %ymm29 + +// CHECK: vshuff32x4 $123, 508(%rdx){1to8}, %ymm27, %ymm29 +// CHECK: encoding: [0x62,0x63,0x25,0x30,0x23,0x6a,0x7f,0x7b] + vshuff32x4 $0x7b, 508(%rdx){1to8}, %ymm27, %ymm29 + +// CHECK: vshuff32x4 $123, 512(%rdx){1to8}, %ymm27, %ymm29 +// CHECK: encoding: [0x62,0x63,0x25,0x30,0x23,0xaa,0x00,0x02,0x00,0x00,0x7b] + vshuff32x4 $0x7b, 512(%rdx){1to8}, %ymm27, %ymm29 + +// CHECK: vshuff32x4 $123, -512(%rdx){1to8}, %ymm27, %ymm29 +// CHECK: encoding: [0x62,0x63,0x25,0x30,0x23,0x6a,0x80,0x7b] + vshuff32x4 $0x7b, -512(%rdx){1to8}, %ymm27, %ymm29 + +// CHECK: vshuff32x4 $123, -516(%rdx){1to8}, %ymm27, %ymm29 +// CHECK: encoding: [0x62,0x63,0x25,0x30,0x23,0xaa,0xfc,0xfd,0xff,0xff,0x7b] + vshuff32x4 $0x7b, -516(%rdx){1to8}, %ymm27, %ymm29 + +// CHECK: vshuff64x2 $171, %ymm20, %ymm18, %ymm18 +// CHECK: encoding: [0x62,0xa3,0xed,0x20,0x23,0xd4,0xab] + vshuff64x2 $0xab, %ymm20, %ymm18, %ymm18 + +// CHECK: vshuff64x2 $171, %ymm20, %ymm18, %ymm18 {%k5} +// CHECK: encoding: [0x62,0xa3,0xed,0x25,0x23,0xd4,0xab] + vshuff64x2 $0xab, %ymm20, %ymm18, %ymm18 {%k5} + +// CHECK: vshuff64x2 $171, %ymm20, %ymm18, %ymm18 {%k5} {z} +// CHECK: encoding: [0x62,0xa3,0xed,0xa5,0x23,0xd4,0xab] + vshuff64x2 $0xab, %ymm20, %ymm18, %ymm18 {%k5} {z} + +// CHECK: vshuff64x2 $123, %ymm20, %ymm18, %ymm18 +// CHECK: encoding: [0x62,0xa3,0xed,0x20,0x23,0xd4,0x7b] + vshuff64x2 $0x7b, %ymm20, %ymm18, %ymm18 + +// CHECK: vshuff64x2 $123, (%rcx), %ymm18, %ymm18 +// CHECK: encoding: [0x62,0xe3,0xed,0x20,0x23,0x11,0x7b] + vshuff64x2 $0x7b, (%rcx), %ymm18, %ymm18 + +// CHECK: vshuff64x2 $123, 291(%rax,%r14,8), %ymm18, %ymm18 +// CHECK: encoding: [0x62,0xa3,0xed,0x20,0x23,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b] + vshuff64x2 $0x7b, 291(%rax,%r14,8), %ymm18, %ymm18 + +// CHECK: vshuff64x2 $123, (%rcx){1to4}, %ymm18, %ymm18 +// CHECK: encoding: [0x62,0xe3,0xed,0x30,0x23,0x11,0x7b] + vshuff64x2 $0x7b, (%rcx){1to4}, %ymm18, %ymm18 + +// CHECK: vshuff64x2 $123, 4064(%rdx), %ymm18, %ymm18 +// CHECK: encoding: [0x62,0xe3,0xed,0x20,0x23,0x52,0x7f,0x7b] + vshuff64x2 $0x7b, 4064(%rdx), %ymm18, %ymm18 + +// CHECK: vshuff64x2 $123, 4096(%rdx), %ymm18, %ymm18 +// CHECK: encoding: [0x62,0xe3,0xed,0x20,0x23,0x92,0x00,0x10,0x00,0x00,0x7b] + vshuff64x2 $0x7b, 4096(%rdx), %ymm18, %ymm18 + +// CHECK: vshuff64x2 $123, -4096(%rdx), %ymm18, %ymm18 +// CHECK: encoding: [0x62,0xe3,0xed,0x20,0x23,0x52,0x80,0x7b] + vshuff64x2 $0x7b, -4096(%rdx), %ymm18, %ymm18 + +// CHECK: vshuff64x2 $123, -4128(%rdx), %ymm18, %ymm18 +// CHECK: encoding: [0x62,0xe3,0xed,0x20,0x23,0x92,0xe0,0xef,0xff,0xff,0x7b] + vshuff64x2 $0x7b, -4128(%rdx), %ymm18, %ymm18 + +// CHECK: vshuff64x2 $123, 1016(%rdx){1to4}, %ymm18, %ymm18 +// CHECK: encoding: [0x62,0xe3,0xed,0x30,0x23,0x52,0x7f,0x7b] + vshuff64x2 $0x7b, 1016(%rdx){1to4}, %ymm18, %ymm18 + +// CHECK: vshuff64x2 $123, 1024(%rdx){1to4}, %ymm18, %ymm18 +// CHECK: encoding: [0x62,0xe3,0xed,0x30,0x23,0x92,0x00,0x04,0x00,0x00,0x7b] + vshuff64x2 $0x7b, 1024(%rdx){1to4}, %ymm18, %ymm18 + +// CHECK: vshuff64x2 $123, -1024(%rdx){1to4}, %ymm18, %ymm18 +// CHECK: encoding: [0x62,0xe3,0xed,0x30,0x23,0x52,0x80,0x7b] + vshuff64x2 $0x7b, -1024(%rdx){1to4}, %ymm18, %ymm18 + +// CHECK: vshuff64x2 $123, -1032(%rdx){1to4}, %ymm18, %ymm18 +// CHECK: encoding: [0x62,0xe3,0xed,0x30,0x23,0x92,0xf8,0xfb,0xff,0xff,0x7b] + vshuff64x2 $0x7b, -1032(%rdx){1to4}, %ymm18, %ymm18 + +// CHECK: vshufi32x4 $171, %ymm17, %ymm27, %ymm18 +// CHECK: encoding: [0x62,0xa3,0x25,0x20,0x43,0xd1,0xab] + vshufi32x4 $0xab, %ymm17, %ymm27, %ymm18 + +// CHECK: vshufi32x4 $171, %ymm17, %ymm27, %ymm18 {%k7} +// CHECK: encoding: [0x62,0xa3,0x25,0x27,0x43,0xd1,0xab] + vshufi32x4 $0xab, %ymm17, %ymm27, %ymm18 {%k7} + +// CHECK: vshufi32x4 $171, %ymm17, %ymm27, %ymm18 {%k7} {z} +// CHECK: encoding: [0x62,0xa3,0x25,0xa7,0x43,0xd1,0xab] + vshufi32x4 $0xab, %ymm17, %ymm27, %ymm18 {%k7} {z} + +// CHECK: vshufi32x4 $123, %ymm17, %ymm27, %ymm18 +// CHECK: encoding: [0x62,0xa3,0x25,0x20,0x43,0xd1,0x7b] + vshufi32x4 $0x7b, %ymm17, %ymm27, %ymm18 + +// CHECK: vshufi32x4 $123, (%rcx), %ymm27, %ymm18 +// CHECK: encoding: [0x62,0xe3,0x25,0x20,0x43,0x11,0x7b] + vshufi32x4 $0x7b, (%rcx), %ymm27, %ymm18 + +// CHECK: vshufi32x4 $123, 291(%rax,%r14,8), %ymm27, %ymm18 +// CHECK: encoding: [0x62,0xa3,0x25,0x20,0x43,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b] + vshufi32x4 $0x7b, 291(%rax,%r14,8), %ymm27, %ymm18 + +// CHECK: vshufi32x4 $123, (%rcx){1to8}, %ymm27, %ymm18 +// CHECK: encoding: [0x62,0xe3,0x25,0x30,0x43,0x11,0x7b] + vshufi32x4 $0x7b, (%rcx){1to8}, %ymm27, %ymm18 + +// CHECK: vshufi32x4 $123, 4064(%rdx), %ymm27, %ymm18 +// CHECK: encoding: [0x62,0xe3,0x25,0x20,0x43,0x52,0x7f,0x7b] + vshufi32x4 $0x7b, 4064(%rdx), %ymm27, %ymm18 + +// CHECK: vshufi32x4 $123, 4096(%rdx), %ymm27, %ymm18 +// CHECK: encoding: [0x62,0xe3,0x25,0x20,0x43,0x92,0x00,0x10,0x00,0x00,0x7b] + vshufi32x4 $0x7b, 4096(%rdx), %ymm27, %ymm18 + +// CHECK: vshufi32x4 $123, -4096(%rdx), %ymm27, %ymm18 +// CHECK: encoding: [0x62,0xe3,0x25,0x20,0x43,0x52,0x80,0x7b] + vshufi32x4 $0x7b, -4096(%rdx), %ymm27, %ymm18 + +// CHECK: vshufi32x4 $123, -4128(%rdx), %ymm27, %ymm18 +// CHECK: encoding: [0x62,0xe3,0x25,0x20,0x43,0x92,0xe0,0xef,0xff,0xff,0x7b] + vshufi32x4 $0x7b, -4128(%rdx), %ymm27, %ymm18 + +// CHECK: vshufi32x4 $123, 508(%rdx){1to8}, %ymm27, %ymm18 +// CHECK: encoding: [0x62,0xe3,0x25,0x30,0x43,0x52,0x7f,0x7b] + vshufi32x4 $0x7b, 508(%rdx){1to8}, %ymm27, %ymm18 + +// CHECK: vshufi32x4 $123, 512(%rdx){1to8}, %ymm27, %ymm18 +// CHECK: encoding: [0x62,0xe3,0x25,0x30,0x43,0x92,0x00,0x02,0x00,0x00,0x7b] + vshufi32x4 $0x7b, 512(%rdx){1to8}, %ymm27, %ymm18 + +// CHECK: vshufi32x4 $123, -512(%rdx){1to8}, %ymm27, %ymm18 +// CHECK: encoding: [0x62,0xe3,0x25,0x30,0x43,0x52,0x80,0x7b] + vshufi32x4 $0x7b, -512(%rdx){1to8}, %ymm27, %ymm18 + +// CHECK: vshufi32x4 $123, -516(%rdx){1to8}, %ymm27, %ymm18 +// CHECK: encoding: [0x62,0xe3,0x25,0x30,0x43,0x92,0xfc,0xfd,0xff,0xff,0x7b] + vshufi32x4 $0x7b, -516(%rdx){1to8}, %ymm27, %ymm18 + +// CHECK: vshufi64x2 $171, %ymm21, %ymm26, %ymm25 +// CHECK: encoding: [0x62,0x23,0xad,0x20,0x43,0xcd,0xab] + vshufi64x2 $0xab, %ymm21, %ymm26, %ymm25 + +// CHECK: vshufi64x2 $171, %ymm21, %ymm26, %ymm25 {%k3} +// CHECK: encoding: [0x62,0x23,0xad,0x23,0x43,0xcd,0xab] + vshufi64x2 $0xab, %ymm21, %ymm26, %ymm25 {%k3} + +// CHECK: vshufi64x2 $171, %ymm21, %ymm26, %ymm25 {%k3} {z} +// CHECK: encoding: [0x62,0x23,0xad,0xa3,0x43,0xcd,0xab] + vshufi64x2 $0xab, %ymm21, %ymm26, %ymm25 {%k3} {z} + +// CHECK: vshufi64x2 $123, %ymm21, %ymm26, %ymm25 +// CHECK: encoding: [0x62,0x23,0xad,0x20,0x43,0xcd,0x7b] + vshufi64x2 $0x7b, %ymm21, %ymm26, %ymm25 + +// CHECK: vshufi64x2 $123, (%rcx), %ymm26, %ymm25 +// CHECK: encoding: [0x62,0x63,0xad,0x20,0x43,0x09,0x7b] + vshufi64x2 $0x7b, (%rcx), %ymm26, %ymm25 + +// CHECK: vshufi64x2 $123, 291(%rax,%r14,8), %ymm26, %ymm25 +// CHECK: encoding: [0x62,0x23,0xad,0x20,0x43,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vshufi64x2 $0x7b, 291(%rax,%r14,8), %ymm26, %ymm25 + +// CHECK: vshufi64x2 $123, (%rcx){1to4}, %ymm26, %ymm25 +// CHECK: encoding: [0x62,0x63,0xad,0x30,0x43,0x09,0x7b] + vshufi64x2 $0x7b, (%rcx){1to4}, %ymm26, %ymm25 + +// CHECK: vshufi64x2 $123, 4064(%rdx), %ymm26, %ymm25 +// CHECK: encoding: [0x62,0x63,0xad,0x20,0x43,0x4a,0x7f,0x7b] + vshufi64x2 $0x7b, 4064(%rdx), %ymm26, %ymm25 + +// CHECK: vshufi64x2 $123, 4096(%rdx), %ymm26, %ymm25 +// CHECK: encoding: [0x62,0x63,0xad,0x20,0x43,0x8a,0x00,0x10,0x00,0x00,0x7b] + vshufi64x2 $0x7b, 4096(%rdx), %ymm26, %ymm25 + +// CHECK: vshufi64x2 $123, -4096(%rdx), %ymm26, %ymm25 +// CHECK: encoding: [0x62,0x63,0xad,0x20,0x43,0x4a,0x80,0x7b] + vshufi64x2 $0x7b, -4096(%rdx), %ymm26, %ymm25 + +// CHECK: vshufi64x2 $123, -4128(%rdx), %ymm26, %ymm25 +// CHECK: encoding: [0x62,0x63,0xad,0x20,0x43,0x8a,0xe0,0xef,0xff,0xff,0x7b] + vshufi64x2 $0x7b, -4128(%rdx), %ymm26, %ymm25 + +// CHECK: vshufi64x2 $123, 1016(%rdx){1to4}, %ymm26, %ymm25 +// CHECK: encoding: [0x62,0x63,0xad,0x30,0x43,0x4a,0x7f,0x7b] + vshufi64x2 $0x7b, 1016(%rdx){1to4}, %ymm26, %ymm25 + +// CHECK: vshufi64x2 $123, 1024(%rdx){1to4}, %ymm26, %ymm25 +// CHECK: encoding: [0x62,0x63,0xad,0x30,0x43,0x8a,0x00,0x04,0x00,0x00,0x7b] + vshufi64x2 $0x7b, 1024(%rdx){1to4}, %ymm26, %ymm25 + +// CHECK: vshufi64x2 $123, -1024(%rdx){1to4}, %ymm26, %ymm25 +// CHECK: encoding: [0x62,0x63,0xad,0x30,0x43,0x4a,0x80,0x7b] + vshufi64x2 $0x7b, -1024(%rdx){1to4}, %ymm26, %ymm25 + +// CHECK: vshufi64x2 $123, -1032(%rdx){1to4}, %ymm26, %ymm25 +// CHECK: encoding: [0x62,0x63,0xad,0x30,0x43,0x8a,0xf8,0xfb,0xff,0xff,0x7b] + vshufi64x2 $0x7b, -1032(%rdx){1to4}, %ymm26, %ymm25 + -- 2.34.1