[X86][AVX512VLBW] add support in byte shift and SAD
authorAsaf Badouh <asaf.badouh@intel.com>
Wed, 2 Sep 2015 14:21:54 +0000 (14:21 +0000)
committerAsaf Badouh <asaf.badouh@intel.com>
Wed, 2 Sep 2015 14:21:54 +0000 (14:21 +0000)
add byte shift left/right
add SAD - compute sum of absolute differences

Differential Revision: http://reviews.llvm.org/D12479

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@246654 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/IR/IntrinsicsX86.td
lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86InstrAVX512.td
lib/Target/X86/X86InstrSSE.td
lib/Target/X86/X86IntrinsicsInfo.h
test/CodeGen/X86/avx-isa-check.ll
test/CodeGen/X86/avx512bw-intrinsics.ll
test/CodeGen/X86/avx512bwvl-intrinsics.ll
test/MC/X86/x86-64-avx512bw.s
test/MC/X86/x86-64-avx512bw_vl.s

index 12165341130f28ce8c6ccefe42c1c47eb3ed6e4d..c9d584bd4ae4abd14d75f6c8804f83a7efb22237 100644 (file)
@@ -2356,6 +2356,12 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
                          llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], 
                         [IntrNoMem]>;
+  def int_x86_avx512_psll_dq_512 : GCCBuiltin<"__builtin_ia32_pslldq512">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty], 
+                        [IntrNoMem]>;
+  def int_x86_avx512_psrl_dq_512 : GCCBuiltin<"__builtin_ia32_psrldq512">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty], 
+                        [IntrNoMem]>;                        
 }
 
 // Gather ops
@@ -4981,6 +4987,9 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
             Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
                                         llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
                       [IntrNoMem]>;
+def int_x86_avx512_psad_bw_512 : GCCBuiltin<"__builtin_ia32_psadbw512">,
+            Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
+                      [IntrNoMem]>;
 }
 // FP logical ops
 let TargetPrefix = "x86" in {
index 560e8934ce6bf1082f977980f678ffe5190826eb..b715cd5931c9aedf1ce3d24c8d2a5294b14f8c55 100644 (file)
@@ -15576,6 +15576,9 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
     case INTR_TYPE_2OP:
       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
         Op.getOperand(2));
+    case INTR_TYPE_2OP_IMM8:
+      return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
+                         DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(2)));
     case INTR_TYPE_3OP:
       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
         Op.getOperand(2), Op.getOperand(3));
index ebc11dc433064eb9364319b0fc9d2524b1403525..d93deaec55d57739fa688563371ab54d67b810c1 100644 (file)
@@ -6822,3 +6822,71 @@ multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
 
 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
+//===----------------------------------------------------------------------===//
+// AVX-512 - Byte shift Left/Right
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
+                             Format MRMm, string OpcodeStr, X86VectorVTInfo _>{
+  def rr : AVX512<opc, MRMr,
+             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>;
+  let mayLoad = 1 in
+    def rm : AVX512<opc, MRMm,
+             (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set _.RC:$dst,(_.VT (OpNode 
+                                   (_.LdFrag addr:$src1), (i8 imm:$src2))))]>;
+}
+
+multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, 
+                                 Format MRMm, string OpcodeStr, Predicate prd>{
+  let Predicates = [prd] in
+    defm Z512 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, 
+                                    OpcodeStr, v8i64_info>, EVEX_V512;
+  let Predicates = [prd, HasVLX] in {
+    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, 
+                                    OpcodeStr, v4i64x_info>, EVEX_V256;
+    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, 
+                                    OpcodeStr, v2i64x_info>, EVEX_V128;
+  }
+}
+defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", 
+                                       HasBWI>, AVX512PDIi8Base, EVEX_4V;
+defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", 
+                                       HasBWI>, AVX512PDIi8Base, EVEX_4V;
+
+
+multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, 
+                                string OpcodeStr, X86VectorVTInfo _src>{
+  def rr : AVX512BI<opc, MRMSrcReg,
+             (outs _src.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set _src.RC:$dst,(_src.VT 
+                                (OpNode _src.RC:$src1, _src.RC:$src2)))]>;
+  let mayLoad = 1 in
+    def rm : AVX512BI<opc, MRMSrcMem,
+             (outs _src.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set _src.RC:$dst,(_src.VT 
+                                (OpNode _src.RC:$src1, 
+                                (_src.VT (bitconvert 
+                                          (_src.LdFrag addr:$src2))))))]>;
+}
+
+multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, 
+                                    string OpcodeStr, Predicate prd> {
+  let Predicates = [prd] in
+    defm Z512 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v64i8_info>, 
+                                    EVEX_V512;
+  let Predicates = [prd, HasVLX] in {
+    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v32i8x_info>, 
+                                    EVEX_V256;
+    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v16i8x_info>,
+                                    EVEX_V128;
+  }
+}
+
+defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", 
+                                       HasBWI>, EVEX_4V;
index b4907074605d47d0ee171d2afefb46e372de8654..c42d789c2031aa1bd26a19b90c164db66e8736dd 100644 (file)
@@ -4137,8 +4137,10 @@ defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
 defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
                             VR128, v4i32, v4i32, bc_v4i32, loadv2i64,
                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+} // Predicates = [HasAVX]
 
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] ,
+                                    Predicates = [HasAVX, NoVLX_Or_NoBWI]in {
   // 128-bit logical shifts.
   def VPSLLDQri : PDIi8<0x73, MRM7r,
                     (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
@@ -4153,8 +4155,7 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
                       (v2i64 (X86vshrdq VR128:$src1, (i8 imm:$src2))))]>,
                     VEX_4V;
   // PSRADQri doesn't exist in SSE[1-3].
-}
-} // Predicates = [HasAVX]
+} // Predicates = [HasAVX, NoVLX_Or_NoBWI]
 
 let Predicates = [HasAVX2, NoVLX] in {
 defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
@@ -4183,8 +4184,10 @@ defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
 defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
                              VR256, v8i32, v4i32, bc_v4i32, loadv2i64,
                              SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+}// Predicates = [HasAVX2]
 
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 , 
+                                    Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
   // 256-bit logical shifts.
   def VPSLLDQYri : PDIi8<0x73, MRM7r,
                     (outs VR256:$dst), (ins VR256:$src1, u8imm:$src2),
@@ -4199,8 +4202,7 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
                       (v4i64 (X86vshrdq VR256:$src1, (i8 imm:$src2))))]>,
                     VEX_4V, VEX_L;
   // PSRADQYri doesn't exist in SSE[1-3].
-}
-} // Predicates = [HasAVX2]
+} // Predicates = [HasAVX2, NoVLX_Or_NoBWI]
 
 let Constraints = "$src1 = $dst" in {
 defm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
index 4921992671990bc67b435151607d872d4215626b..409ba59b1dc44146ddda55632296fd412b9a0c1d 100644 (file)
@@ -19,7 +19,7 @@ namespace llvm {
 enum IntrinsicType {
   INTR_NO_TYPE,
   GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
-  INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
+  INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP,
   CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
   INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
   INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
@@ -1426,6 +1426,9 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_512, VPERM_3OP_MASKZ,
                      X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
+  X86_INTRINSIC_DATA(avx512_psll_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSHLDQ, 0),
+  X86_INTRINSIC_DATA(avx512_psrl_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSRLDQ, 0),
   X86_INTRINSIC_DATA(avx512_rcp28_pd,   INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
   X86_INTRINSIC_DATA(avx512_rcp28_ps,   INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
   X86_INTRINSIC_DATA(avx512_rcp28_sd,   INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
index d551e2331d42977034eafee818bb34a4f1207604..4d8db7df8b07c03c8ab649143a8863ca837856f6 100644 (file)
@@ -261,4 +261,9 @@ define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
   %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
   %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
   ret <2 x double> %bitcast64
-}
\ No newline at end of file
+}
+
+define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
+  %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
+  ret <16 x i16> %shuffle
+}
index ab724da69272ee377a9a207f705cef8bca8c126e..6376657cf16703181392ff829d1ddbeb7bb035bb 100644 (file)
@@ -1221,3 +1221,41 @@ define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8>
   %res4 = add <32 x i16> %res3, %res2
   ret <32 x i16> %res4
 }
+
+declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_psll_dq_512
+; CHECK-NOT: call 
+; CHECK: vpslldq 
+; CHECK: vpslldq 
+define <8 x i64>@test_int_x86_avx512_mask_psll_dq_512(<8 x i64> %x0) {
+  %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8)
+  %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_psrl_dq_512
+; CHECK-NOT: call 
+; CHECK: vpsrldq 
+; CHECK: vpsrldq
+define <8 x i64>@test_int_x86_avx512_mask_psrl_dq_512(<8 x i64> %x0) {
+  %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8)
+  %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+declare  <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_psadb_w_512
+; CHECK-NOT: call 
+; CHECK: vpsadbw %zmm1
+; CHECK: vpsadbw %zmm2
+define  <64 x i8>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){
+  %res = call  <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1)
+  %res1 = call  <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2)
+  %res2 = add  <64 x i8> %res, %res1
+  ret  <64 x i8> %res2
+}
index 3397c770d42075f319f960a53c23fd38d142627e..bb8a1f51cb03158e19bcf7f9fc6cd6db6a47ac83 100644 (file)
@@ -4275,3 +4275,4 @@ define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8>
   %res4 = add <16 x i16> %res3, %res2
   ret <16 x i16> %res4
 }
+
index a13c49aaac6bfc85a4e56b0ffbe9bce76361d37d..c557da13ea8201a48dd64cf62b643345fd34fa88 100644 (file)
 // CHECK:  encoding: [0x62,0xe3,0x5d,0x40,0x42,0xaa,0xc0,0xdf,0xff,0xff,0x7b]
           vdbpsadbw $123, -8256(%rdx), %zmm20, %zmm21
 
+// CHECK: vpslldq $171, %zmm28, %zmm20
+// CHECK:  encoding: [0x62,0x91,0x5d,0x40,0x73,0xfc,0xab]
+          vpslldq $171, %zmm28, %zmm20
+
+// CHECK: vpslldq $123, %zmm28, %zmm20
+// CHECK:  encoding: [0x62,0x91,0x5d,0x40,0x73,0xfc,0x7b]
+          vpslldq $123, %zmm28, %zmm20
+
+// CHECK: vpslldq $123, (%rcx), %zmm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x40,0x73,0x39,0x7b]
+          vpslldq $123, (%rcx), %zmm20
+
+// CHECK: vpslldq $123, 291(%rax,%r14,8), %zmm20
+// CHECK:  encoding: [0x62,0xb1,0x5d,0x40,0x73,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpslldq $123, 291(%rax,%r14,8), %zmm20
+
+// CHECK: vpslldq $123, 8128(%rdx), %zmm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x40,0x73,0x7a,0x7f,0x7b]
+          vpslldq $123, 8128(%rdx), %zmm20
+
+// CHECK: vpslldq $123, 8192(%rdx), %zmm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x40,0x73,0xba,0x00,0x20,0x00,0x00,0x7b]
+          vpslldq $123, 8192(%rdx), %zmm20
+
+// CHECK: vpslldq $123, -8192(%rdx), %zmm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x40,0x73,0x7a,0x80,0x7b]
+          vpslldq $123, -8192(%rdx), %zmm20
+
+// CHECK: vpslldq $123, -8256(%rdx), %zmm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x40,0x73,0xba,0xc0,0xdf,0xff,0xff,0x7b]
+          vpslldq $123, -8256(%rdx), %zmm20
+
+// CHECK: vpsrldq $171, %zmm26, %zmm18
+// CHECK:  encoding: [0x62,0x91,0x6d,0x40,0x73,0xda,0xab]
+          vpsrldq $171, %zmm26, %zmm18
+
+// CHECK: vpsrldq $123, %zmm26, %zmm18
+// CHECK:  encoding: [0x62,0x91,0x6d,0x40,0x73,0xda,0x7b]
+          vpsrldq $123, %zmm26, %zmm18
+
+// CHECK: vpsrldq $123, (%rcx), %zmm18
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x40,0x73,0x19,0x7b]
+          vpsrldq $123, (%rcx), %zmm18
+
+// CHECK: vpsrldq $123, 291(%rax,%r14,8), %zmm18
+// CHECK:  encoding: [0x62,0xb1,0x6d,0x40,0x73,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpsrldq $123, 291(%rax,%r14,8), %zmm18
+
+// CHECK: vpsrldq $123, 8128(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x40,0x73,0x5a,0x7f,0x7b]
+          vpsrldq $123, 8128(%rdx), %zmm18
+
+// CHECK: vpsrldq $123, 8192(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x40,0x73,0x9a,0x00,0x20,0x00,0x00,0x7b]
+          vpsrldq $123, 8192(%rdx), %zmm18
+
+// CHECK: vpsrldq $123, -8192(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x40,0x73,0x5a,0x80,0x7b]
+          vpsrldq $123, -8192(%rdx), %zmm18
+
+// CHECK: vpsrldq $123, -8256(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x40,0x73,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
+          vpsrldq $123, -8256(%rdx), %zmm18
+
+// CHECK: vpsadbw %zmm22, %zmm25, %zmm28
+// CHECK:  encoding: [0x62,0x21,0x35,0x40,0xf6,0xe6]
+          vpsadbw %zmm22, %zmm25, %zmm28
+
+// CHECK: vpsadbw (%rcx), %zmm25, %zmm28
+// CHECK:  encoding: [0x62,0x61,0x35,0x40,0xf6,0x21]
+          vpsadbw (%rcx), %zmm25, %zmm28
+
+// CHECK: vpsadbw 291(%rax,%r14,8), %zmm25, %zmm28
+// CHECK:  encoding: [0x62,0x21,0x35,0x40,0xf6,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpsadbw 291(%rax,%r14,8), %zmm25, %zmm28
+
+// CHECK: vpsadbw 8128(%rdx), %zmm25, %zmm28
+// CHECK:  encoding: [0x62,0x61,0x35,0x40,0xf6,0x62,0x7f]
+          vpsadbw 8128(%rdx), %zmm25, %zmm28
+
+// CHECK: vpsadbw 8192(%rdx), %zmm25, %zmm28
+// CHECK:  encoding: [0x62,0x61,0x35,0x40,0xf6,0xa2,0x00,0x20,0x00,0x00]
+          vpsadbw 8192(%rdx), %zmm25, %zmm28
+
+// CHECK: vpsadbw -8192(%rdx), %zmm25, %zmm28
+// CHECK:  encoding: [0x62,0x61,0x35,0x40,0xf6,0x62,0x80]
+          vpsadbw -8192(%rdx), %zmm25, %zmm28
+
+// CHECK: vpsadbw -8256(%rdx), %zmm25, %zmm28
+// CHECK:  encoding: [0x62,0x61,0x35,0x40,0xf6,0xa2,0xc0,0xdf,0xff,0xff]
+          vpsadbw -8256(%rdx), %zmm25, %zmm28
index ae46e9424764dc7b6d549a0e6329272d068a23f4..91ae301b9d1545fb572546f9ad15fcd30309d4fd 100644 (file)
 // CHECK:  encoding: [0x62,0x61,0x35,0x20,0x69,0xa2,0xe0,0xef,0xff,0xff]
           vpunpckhwd -4128(%rdx), %ymm25, %ymm28
 
+
 // CHECK: vpalignr $171, %xmm21, %xmm26, %xmm19
 // CHECK:  encoding: [0x62,0xa3,0x2d,0x00,0x0f,0xdd,0xab]
           vpalignr $171, %xmm21, %xmm26, %xmm19
 // CHECK: vdbpsadbw $123, -4128(%rdx), %ymm19, %ymm17
 // CHECK:  encoding: [0x62,0xe3,0x65,0x20,0x42,0x8a,0xe0,0xef,0xff,0xff,0x7b]
           vdbpsadbw $123, -4128(%rdx), %ymm19, %ymm17
+// CHECK: vpslldq $171, %xmm24, %xmm20
+// CHECK:  encoding: [0x62,0x91,0x5d,0x00,0x73,0xf8,0xab]
+          vpslldq $171, %xmm24, %xmm20
+
+// CHECK: vpslldq $123, %xmm24, %xmm20
+// CHECK:  encoding: [0x62,0x91,0x5d,0x00,0x73,0xf8,0x7b]
+          vpslldq $123, %xmm24, %xmm20
+
+// CHECK: vpslldq $123, (%rcx), %xmm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x00,0x73,0x39,0x7b]
+          vpslldq $123, (%rcx), %xmm20
+
+// CHECK: vpslldq $123, 291(%rax,%r14,8), %xmm20
+// CHECK:  encoding: [0x62,0xb1,0x5d,0x00,0x73,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpslldq $123, 291(%rax,%r14,8), %xmm20
+
+// CHECK: vpslldq $123, 2032(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x00,0x73,0x7a,0x7f,0x7b]
+          vpslldq $123, 2032(%rdx), %xmm20
+
+// CHECK: vpslldq $123, 2048(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x00,0x73,0xba,0x00,0x08,0x00,0x00,0x7b]
+          vpslldq $123, 2048(%rdx), %xmm20
+
+// CHECK: vpslldq $123, -2048(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x00,0x73,0x7a,0x80,0x7b]
+          vpslldq $123, -2048(%rdx), %xmm20
+
+// CHECK: vpslldq $123, -2064(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x00,0x73,0xba,0xf0,0xf7,0xff,0xff,0x7b]
+          vpslldq $123, -2064(%rdx), %xmm20
+
+// CHECK: vpslldq $171, %ymm25, %ymm26
+// CHECK:  encoding: [0x62,0x91,0x2d,0x20,0x73,0xf9,0xab]
+          vpslldq $171, %ymm25, %ymm26
+
+// CHECK: vpslldq $123, %ymm25, %ymm26
+// CHECK:  encoding: [0x62,0x91,0x2d,0x20,0x73,0xf9,0x7b]
+          vpslldq $123, %ymm25, %ymm26
+
+// CHECK: vpslldq $123, (%rcx), %ymm26
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x20,0x73,0x39,0x7b]
+          vpslldq $123, (%rcx), %ymm26
+
+// CHECK: vpslldq $123, 291(%rax,%r14,8), %ymm26
+// CHECK:  encoding: [0x62,0xb1,0x2d,0x20,0x73,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpslldq $123, 291(%rax,%r14,8), %ymm26
+
+// CHECK: vpslldq $123, 4064(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x20,0x73,0x7a,0x7f,0x7b]
+          vpslldq $123, 4064(%rdx), %ymm26
+
+// CHECK: vpslldq $123, 4096(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x20,0x73,0xba,0x00,0x10,0x00,0x00,0x7b]
+          vpslldq $123, 4096(%rdx), %ymm26
+
+// CHECK: vpslldq $123, -4096(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x20,0x73,0x7a,0x80,0x7b]
+          vpslldq $123, -4096(%rdx), %ymm26
+
+// CHECK: vpslldq $123, -4128(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x20,0x73,0xba,0xe0,0xef,0xff,0xff,0x7b]
+          vpslldq $123, -4128(%rdx), %ymm26
+
+// CHECK: vpslldq $171, %xmm19, %xmm23
+// CHECK:  encoding: [0x62,0xb1,0x45,0x00,0x73,0xfb,0xab]
+          vpslldq $0xab, %xmm19, %xmm23
+
+// CHECK: vpslldq $123, %xmm19, %xmm23
+// CHECK:  encoding: [0x62,0xb1,0x45,0x00,0x73,0xfb,0x7b]
+          vpslldq $0x7b, %xmm19, %xmm23
+
+// CHECK: vpslldq $123, (%rcx), %xmm23
+// CHECK:  encoding: [0x62,0xf1,0x45,0x00,0x73,0x39,0x7b]
+          vpslldq $0x7b,(%rcx), %xmm23
+
+// CHECK: vpslldq $123, 4660(%rax,%r14,8), %xmm23
+// CHECK:  encoding: [0x62,0xb1,0x45,0x00,0x73,0xbc,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vpslldq $0x7b,4660(%rax,%r14,8), %xmm23
+
+// CHECK: vpslldq $123, 2032(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xf1,0x45,0x00,0x73,0x7a,0x7f,0x7b]
+          vpslldq $0x7b,2032(%rdx), %xmm23
+
+// CHECK: vpslldq $123, 2048(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xf1,0x45,0x00,0x73,0xba,0x00,0x08,0x00,0x00,0x7b]
+          vpslldq $0x7b,2048(%rdx), %xmm23
+
+// CHECK: vpslldq $123, -2048(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xf1,0x45,0x00,0x73,0x7a,0x80,0x7b]
+          vpslldq $0x7b,-2048(%rdx), %xmm23
+
+// CHECK: vpslldq $123, -2064(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xf1,0x45,0x00,0x73,0xba,0xf0,0xf7,0xff,0xff,0x7b]
+          vpslldq $0x7b,-2064(%rdx), %xmm23
+
+// CHECK: vpslldq $171, %ymm25, %ymm29
+// CHECK:  encoding: [0x62,0x91,0x15,0x20,0x73,0xf9,0xab]
+          vpslldq $0xab, %ymm25, %ymm29
+
+// CHECK: vpslldq $123, %ymm25, %ymm29
+// CHECK:  encoding: [0x62,0x91,0x15,0x20,0x73,0xf9,0x7b]
+          vpslldq $0x7b, %ymm25, %ymm29
+
+// CHECK: vpslldq $123, (%rcx), %ymm29
+// CHECK:  encoding: [0x62,0xf1,0x15,0x20,0x73,0x39,0x7b]
+          vpslldq $0x7b,(%rcx), %ymm29
+
+// CHECK: vpslldq $123, 4660(%rax,%r14,8), %ymm29
+// CHECK:  encoding: [0x62,0xb1,0x15,0x20,0x73,0xbc,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vpslldq $0x7b,4660(%rax,%r14,8), %ymm29
+
+// CHECK: vpslldq $123, 4064(%rdx), %ymm29
+// CHECK:  encoding: [0x62,0xf1,0x15,0x20,0x73,0x7a,0x7f,0x7b]
+          vpslldq $0x7b,4064(%rdx), %ymm29
+
+// CHECK: vpslldq $123, 4096(%rdx), %ymm29
+// CHECK:  encoding: [0x62,0xf1,0x15,0x20,0x73,0xba,0x00,0x10,0x00,0x00,0x7b]
+          vpslldq $0x7b,4096(%rdx), %ymm29
+
+// CHECK: vpslldq $123, -4096(%rdx), %ymm29
+// CHECK:  encoding: [0x62,0xf1,0x15,0x20,0x73,0x7a,0x80,0x7b]
+          vpslldq $0x7b,-4096(%rdx), %ymm29
+
+// CHECK: vpslldq $123, -4128(%rdx), %ymm29
+// CHECK:  encoding: [0x62,0xf1,0x15,0x20,0x73,0xba,0xe0,0xef,0xff,0xff,0x7b]
+          vpslldq $0x7b,-4128(%rdx), %ymm29
+
+// CHECK: vpsrldq $171, %xmm21, %xmm24
+// CHECK:  encoding: [0x62,0xb1,0x3d,0x00,0x73,0xdd,0xab]
+          vpsrldq $171, %xmm21, %xmm24
+
+// CHECK: vpsrldq $123, %xmm21, %xmm24
+// CHECK:  encoding: [0x62,0xb1,0x3d,0x00,0x73,0xdd,0x7b]
+          vpsrldq $123, %xmm21, %xmm24
+
+// CHECK: vpsrldq $123, (%rcx), %xmm24
+// CHECK:  encoding: [0x62,0xf1,0x3d,0x00,0x73,0x19,0x7b]
+          vpsrldq $123, (%rcx), %xmm24
+
+// CHECK: vpsrldq $123, 291(%rax,%r14,8), %xmm24
+// CHECK:  encoding: [0x62,0xb1,0x3d,0x00,0x73,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpsrldq $123, 291(%rax,%r14,8), %xmm24
+
+// CHECK: vpsrldq $123, 2032(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0xf1,0x3d,0x00,0x73,0x5a,0x7f,0x7b]
+          vpsrldq $123, 2032(%rdx), %xmm24
+
+// CHECK: vpsrldq $123, 2048(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0xf1,0x3d,0x00,0x73,0x9a,0x00,0x08,0x00,0x00,0x7b]
+          vpsrldq $123, 2048(%rdx), %xmm24
+
+// CHECK: vpsrldq $123, -2048(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0xf1,0x3d,0x00,0x73,0x5a,0x80,0x7b]
+          vpsrldq $123, -2048(%rdx), %xmm24
+
+// CHECK: vpsrldq $123, -2064(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0xf1,0x3d,0x00,0x73,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+          vpsrldq $123, -2064(%rdx), %xmm24
+
+// CHECK: vpsrldq $171, %ymm25, %ymm24
+// CHECK:  encoding: [0x62,0x91,0x3d,0x20,0x73,0xd9,0xab]
+          vpsrldq $171, %ymm25, %ymm24
+
+// CHECK: vpsrldq $123, %ymm25, %ymm24
+// CHECK:  encoding: [0x62,0x91,0x3d,0x20,0x73,0xd9,0x7b]
+          vpsrldq $123, %ymm25, %ymm24
+
+// CHECK: vpsrldq $123, (%rcx), %ymm24
+// CHECK:  encoding: [0x62,0xf1,0x3d,0x20,0x73,0x19,0x7b]
+          vpsrldq $123, (%rcx), %ymm24
+
+// CHECK: vpsrldq $123, 291(%rax,%r14,8), %ymm24
+// CHECK:  encoding: [0x62,0xb1,0x3d,0x20,0x73,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpsrldq $123, 291(%rax,%r14,8), %ymm24
+
+// CHECK: vpsrldq $123, 4064(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0xf1,0x3d,0x20,0x73,0x5a,0x7f,0x7b]
+          vpsrldq $123, 4064(%rdx), %ymm24
+
+// CHECK: vpsrldq $123, 4096(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0xf1,0x3d,0x20,0x73,0x9a,0x00,0x10,0x00,0x00,0x7b]
+          vpsrldq $123, 4096(%rdx), %ymm24
+
+// CHECK: vpsrldq $123, -4096(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0xf1,0x3d,0x20,0x73,0x5a,0x80,0x7b]
+          vpsrldq $123, -4096(%rdx), %ymm24
+
+// CHECK: vpsrldq $123, -4128(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0xf1,0x3d,0x20,0x73,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+          vpsrldq $123, -4128(%rdx), %ymm24
+
+// CHECK: vpsrldq $171, %xmm17, %xmm18
+// CHECK:  encoding: [0x62,0xb1,0x6d,0x00,0x73,0xd9,0xab]
+          vpsrldq $0xab, %xmm17, %xmm18
+
+// CHECK: vpsrldq $123, %xmm17, %xmm18
+// CHECK:  encoding: [0x62,0xb1,0x6d,0x00,0x73,0xd9,0x7b]
+          vpsrldq $0x7b, %xmm17, %xmm18
+
+// CHECK: vpsrldq $123, (%rcx), %xmm18
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x00,0x73,0x19,0x7b]
+          vpsrldq $0x7b,(%rcx), %xmm18
+
+// CHECK: vpsrldq $123, 4660(%rax,%r14,8), %xmm18
+// CHECK:  encoding: [0x62,0xb1,0x6d,0x00,0x73,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vpsrldq $0x7b,4660(%rax,%r14,8), %xmm18
+
+// CHECK: vpsrldq $123, 2032(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x00,0x73,0x5a,0x7f,0x7b]
+          vpsrldq $0x7b,2032(%rdx), %xmm18
+
+// CHECK: vpsrldq $123, 2048(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x00,0x73,0x9a,0x00,0x08,0x00,0x00,0x7b]
+          vpsrldq $0x7b,2048(%rdx), %xmm18
+
+// CHECK: vpsrldq $123, -2048(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x00,0x73,0x5a,0x80,0x7b]
+          vpsrldq $0x7b,-2048(%rdx), %xmm18
+
+// CHECK: vpsrldq $123, -2064(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x00,0x73,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+          vpsrldq $0x7b,-2064(%rdx), %xmm18
+
+// CHECK: vpsrldq $171, %ymm28, %ymm20
+// CHECK:  encoding: [0x62,0x91,0x5d,0x20,0x73,0xdc,0xab]
+          vpsrldq $0xab, %ymm28, %ymm20
+
+// CHECK: vpsrldq $123, %ymm28, %ymm20
+// CHECK:  encoding: [0x62,0x91,0x5d,0x20,0x73,0xdc,0x7b]
+          vpsrldq $0x7b, %ymm28, %ymm20
+
+// CHECK: vpsrldq $123, (%rcx), %ymm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x20,0x73,0x19,0x7b]
+          vpsrldq $0x7b,(%rcx), %ymm20
+
+// CHECK: vpsrldq $123, 4660(%rax,%r14,8), %ymm20
+// CHECK:  encoding: [0x62,0xb1,0x5d,0x20,0x73,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vpsrldq $0x7b,4660(%rax,%r14,8), %ymm20
+
+// CHECK: vpsrldq $123, 4064(%rdx), %ymm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x20,0x73,0x5a,0x7f,0x7b]
+          vpsrldq $0x7b,4064(%rdx), %ymm20
+
+// CHECK: vpsrldq $123, 4096(%rdx), %ymm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x20,0x73,0x9a,0x00,0x10,0x00,0x00,0x7b]
+          vpsrldq $0x7b,4096(%rdx), %ymm20
+
+// CHECK: vpsrldq $123, -4096(%rdx), %ymm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x20,0x73,0x5a,0x80,0x7b]
+          vpsrldq $0x7b,-4096(%rdx), %ymm20
+
+// CHECK: vpsrldq $123, -4128(%rdx), %ymm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x20,0x73,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+          vpsrldq $0x7b,-4128(%rdx), %ymm20
+
+// CHECK: vpsadbw %xmm24, %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0x81,0x3d,0x00,0xf6,0xc8]
+          vpsadbw %xmm24, %xmm24, %xmm17
+
+// CHECK: vpsadbw (%rcx), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x00,0xf6,0x09]
+          vpsadbw (%rcx), %xmm24, %xmm17
+
+// CHECK: vpsadbw 291(%rax,%r14,8), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x3d,0x00,0xf6,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpsadbw 291(%rax,%r14,8), %xmm24, %xmm17
+
+// CHECK: vpsadbw 2032(%rdx), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x00,0xf6,0x4a,0x7f]
+          vpsadbw 2032(%rdx), %xmm24, %xmm17
+
+// CHECK: vpsadbw 2048(%rdx), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x00,0xf6,0x8a,0x00,0x08,0x00,0x00]
+          vpsadbw 2048(%rdx), %xmm24, %xmm17
+
+// CHECK: vpsadbw -2048(%rdx), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x00,0xf6,0x4a,0x80]
+          vpsadbw -2048(%rdx), %xmm24, %xmm17
+
+// CHECK: vpsadbw -2064(%rdx), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x00,0xf6,0x8a,0xf0,0xf7,0xff,0xff]
+          vpsadbw -2064(%rdx), %xmm24, %xmm17
+
+// CHECK: vpsadbw %ymm24, %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0x81,0x25,0x20,0xf6,0xd8]
+          vpsadbw %ymm24, %ymm27, %ymm19
+
+// CHECK: vpsadbw (%rcx), %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x25,0x20,0xf6,0x19]
+          vpsadbw (%rcx), %ymm27, %ymm19
+
+// CHECK: vpsadbw 291(%rax,%r14,8), %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xa1,0x25,0x20,0xf6,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpsadbw 291(%rax,%r14,8), %ymm27, %ymm19
+
+// CHECK: vpsadbw 4064(%rdx), %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x25,0x20,0xf6,0x5a,0x7f]
+          vpsadbw 4064(%rdx), %ymm27, %ymm19
+
+// CHECK: vpsadbw 4096(%rdx), %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x25,0x20,0xf6,0x9a,0x00,0x10,0x00,0x00]
+          vpsadbw 4096(%rdx), %ymm27, %ymm19
+
+// CHECK: vpsadbw -4096(%rdx), %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x25,0x20,0xf6,0x5a,0x80]
+          vpsadbw -4096(%rdx), %ymm27, %ymm19
+
+// CHECK: vpsadbw -4128(%rdx), %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x25,0x20,0xf6,0x9a,0xe0,0xef,0xff,0xff]
+          vpsadbw -4128(%rdx), %ymm27, %ymm19
+
+// CHECK: vpsadbw %xmm21, %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x21,0x65,0x00,0xf6,0xf5]
+          vpsadbw %xmm21, %xmm19, %xmm30
+
+// CHECK: vpsadbw (%rcx), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x65,0x00,0xf6,0x31]
+          vpsadbw (%rcx), %xmm19, %xmm30
+
+// CHECK: vpsadbw 4660(%rax,%r14,8), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x21,0x65,0x00,0xf6,0xb4,0xf0,0x34,0x12,0x00,0x00]
+          vpsadbw 4660(%rax,%r14,8), %xmm19, %xmm30
+
+// CHECK: vpsadbw 2032(%rdx), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x65,0x00,0xf6,0x72,0x7f]
+          vpsadbw 2032(%rdx), %xmm19, %xmm30
+
+// CHECK: vpsadbw 2048(%rdx), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x65,0x00,0xf6,0xb2,0x00,0x08,0x00,0x00]
+          vpsadbw 2048(%rdx), %xmm19, %xmm30
+
+// CHECK: vpsadbw -2048(%rdx), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x65,0x00,0xf6,0x72,0x80]
+          vpsadbw -2048(%rdx), %xmm19, %xmm30
+
+// CHECK: vpsadbw -2064(%rdx), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x65,0x00,0xf6,0xb2,0xf0,0xf7,0xff,0xff]
+          vpsadbw -2064(%rdx), %xmm19, %xmm30
+
+// CHECK: vpsadbw %ymm27, %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0x81,0x2d,0x20,0xf6,0xe3]
+          vpsadbw %ymm27, %ymm26, %ymm20
+
+// CHECK: vpsadbw (%rcx), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x2d,0x20,0xf6,0x21]
+          vpsadbw (%rcx), %ymm26, %ymm20
+
+// CHECK: vpsadbw 4660(%rax,%r14,8), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xa1,0x2d,0x20,0xf6,0xa4,0xf0,0x34,0x12,0x00,0x00]
+          vpsadbw 4660(%rax,%r14,8), %ymm26, %ymm20
+
+// CHECK: vpsadbw 4064(%rdx), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x2d,0x20,0xf6,0x62,0x7f]
+          vpsadbw 4064(%rdx), %ymm26, %ymm20
+
+// CHECK: vpsadbw 4096(%rdx), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x2d,0x20,0xf6,0xa2,0x00,0x10,0x00,0x00]
+          vpsadbw 4096(%rdx), %ymm26, %ymm20
+
+// CHECK: vpsadbw -4096(%rdx), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x2d,0x20,0xf6,0x62,0x80]
+          vpsadbw -4096(%rdx), %ymm26, %ymm20
+
+// CHECK: vpsadbw -4128(%rdx), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x2d,0x20,0xf6,0xa2,0xe0,0xef,0xff,0xff]
+          vpsadbw -4128(%rdx), %ymm26, %ymm20