AVX-512: All forms of VCOPMRESS VEXPAND instructions,
authorElena Demikhovsky <elena.demikhovsky@intel.com>
Mon, 22 Jun 2015 11:16:30 +0000 (11:16 +0000)
committerElena Demikhovsky <elena.demikhovsky@intel.com>
Mon, 22 Jun 2015 11:16:30 +0000 (11:16 +0000)
encoding tests.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240272 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86InstrAVX512.td
lib/Target/X86/X86InstrFragmentsSIMD.td
test/MC/X86/avx512-encodings.s
test/MC/X86/avx512vl-encoding.s

index 67e733384ab6901f3392b2d61186951a763ee24f..00c04c1ba4c6d930ee82e92fa897ed39b2e5f44f 100644 (file)
@@ -15215,18 +15215,10 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
       SDValue PassThru = Op.getOperand(2);
       if (isAllOnes(Mask)) // return data as is
         return Op.getOperand(1);
-      EVT VT = Op.getValueType();
-      EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
-                                    VT.getVectorNumElements());
-      EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
-                                       Mask.getValueType().getSizeInBits());
-      SDLoc dl(Op);
-      SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
-                                  DAG.getBitcast(BitcastVT, Mask),
-                                  DAG.getIntPtrConstant(0, dl));
 
-      return DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToCompress,
-                         PassThru);
+      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+                                              DataToCompress),
+                                  Mask, PassThru, Subtarget, DAG);
     }
     case BLEND: {
       SDValue Mask = Op.getOperand(3);
@@ -15769,16 +15761,9 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
                           MachinePointerInfo(), false, false,
                           VT.getScalarSizeInBits()/8);
 
-    EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
-                                  VT.getVectorNumElements());
-    EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
-                                     Mask.getValueType().getSizeInBits());
-    SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
-                                DAG.getBitcast(BitcastVT, Mask),
-                                DAG.getIntPtrConstant(0, dl));
-
-    SDValue Compressed =  DAG.getNode(IntrData->Opc0, dl, VT, VMask,
-                                      DataToCompress, DAG.getUNDEF(VT));
+    SDValue Compressed =
+      getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress),
+                           Mask, DAG.getUNDEF(VT), Subtarget, DAG);
     return DAG.getStore(Chain, dl, Compressed, Addr,
                         MachinePointerInfo(), false, false,
                         VT.getScalarSizeInBits()/8);
@@ -15786,7 +15771,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
   case EXPAND_FROM_MEM: {
     SDLoc dl(Op);
     SDValue Mask = Op.getOperand(4);
-    SDValue PathThru = Op.getOperand(3);
+    SDValue PassThru = Op.getOperand(3);
     SDValue Addr = Op.getOperand(2);
     SDValue Chain = Op.getOperand(0);
     EVT VT = Op.getValueType();
@@ -15794,21 +15779,14 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
     if (isAllOnes(Mask)) // return just a load
       return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false,
                          false, VT.getScalarSizeInBits()/8);
-    EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
-                                  VT.getVectorNumElements());
-    EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
-                                     Mask.getValueType().getSizeInBits());
-    SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
-                                DAG.getBitcast(BitcastVT, Mask),
-                                DAG.getIntPtrConstant(0, dl));
 
     SDValue DataToExpand = DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(),
                                        false, false, false,
                                        VT.getScalarSizeInBits()/8);
 
     SDValue Results[] = {
-        DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToExpand, PathThru),
-        Chain};
+      getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToExpand),
+                           Mask, PassThru, Subtarget, DAG), Chain};
     return DAG.getMergeValues(Results, dl);
   }
   }
index de6a83506b2868cbeea127d4acdc17bcd627d6f4..c60b11cbe3162cc5a4bdb32c3d1c3c857e76ebeb 100644 (file)
@@ -5868,26 +5868,24 @@ defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
 //===----------------------------------------------------------------------===//
 // AVX-512 - COMPRESS and EXPAND
 //
+
 multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
                                  string OpcodeStr> {
-  def rrkz : AVX5128I<opc, MRMDestReg, (outs _.RC:$dst),
-              (ins _.KRCWM:$mask, _.RC:$src),
-              OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
-              [(set _.RC:$dst, (_.VT (X86compress _.KRCWM:$mask, _.RC:$src,
-                                      _.ImmAllZerosV)))]>, EVEX_KZ;
-
-  let Constraints = "$src0 = $dst" in
-  def rrk : AVX5128I<opc, MRMDestReg, (outs _.RC:$dst),
-                    (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src),
-                    OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
-                    [(set _.RC:$dst, (_.VT (X86compress _.KRCWM:$mask, _.RC:$src,
-                                            _.RC:$src0)))]>, EVEX_K;
+  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
+              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 
+              (_.VT (X86compress _.RC:$src1))>, AVX5128IBase;
 
   let mayStore = 1 in {
+  def mr : AVX5128I<opc, MRMDestMem, (outs),
+              (ins _.MemOp:$dst, _.RC:$src),
+              OpcodeStr # "\t{$src, $dst |$dst, $src}",
+              []>, EVEX_CD8<_.EltSize, CD8VT1>;
+
   def mrk : AVX5128I<opc, MRMDestMem, (outs),
               (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
               OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
-              [(store (_.VT (X86compress _.KRCWM:$mask, _.RC:$src, undef)),
+              [(store (_.VT (vselect _.KRCWM:$mask, 
+                             (_.VT (X86compress  _.RC:$src)), _.ImmAllZerosV)),
                 addr:$dst)]>,
               EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
   }
@@ -5915,37 +5913,16 @@ defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info
 // expand
 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
                                  string OpcodeStr> {
-  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
-              (ins _.KRCWM:$mask, _.RC:$src),
-              OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
-              [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask, (_.VT _.RC:$src),
-                                      _.ImmAllZerosV)))]>, EVEX_KZ;
-
-  let Constraints = "$src0 = $dst" in
-  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
-                    (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src),
-                    OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
-                    [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
-                                      (_.VT _.RC:$src), _.RC:$src0)))]>, EVEX_K;
-
-  let mayLoad = 1, Constraints = "$src0 = $dst" in
-  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
-              (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src),
-              OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
-              [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
-                                      (_.VT (bitconvert
-                                              (_.LdFrag addr:$src))),
-                                      _.RC:$src0)))]>,
-              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
+  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 
+              (_.VT (X86expand _.RC:$src1))>, AVX5128IBase;
 
   let mayLoad = 1 in
-  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
-              (ins _.KRCWM:$mask, _.MemOp:$src),
-              OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
-              [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
-                                      (_.VT (bitconvert (_.LdFrag addr:$src))),
-                                     _.ImmAllZerosV)))]>,
-              EVEX_KZ, EVEX_CD8<_.EltSize, CD8VT1>;
+  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
+              (_.VT (X86expand (_.VT (bitconvert
+                                      (_.LdFrag addr:$src1)))))>,
+            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>;
 }
 
 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
index 16ae77dd81a349ca67db19f3d032ee61965d4ade..de3b3b6516a402e73f5143bad16d920b8d0a3e6c 100644 (file)
@@ -347,12 +347,10 @@ def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
 def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>;
 def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>;
 
-def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3,
-                              [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
-                               SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
-def X86expand  : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3,
-                              [SDTCisSameAs<0, 3>,
-                               SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
+def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 1,
+                              [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
+def X86expand  : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1,
+                              [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
 
 def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
                                SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>;
index e52dfac1976c90734199e6b2d1ae9ee38f7f9e6d..05a7b1b82b6d83e956381063009b636951e7bb8d 100644 (file)
@@ -9514,3 +9514,170 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
 // CHECK:  encoding: [0x62,0xe2,0xd5,0x58,0x77,0xa2,0xf8,0xfb,0xff,0xff]
           vpermi2pd -1032(%rdx){1to8}, %zmm5, %zmm20
 
+// CHECK: vcompresspd %zmm9, (%rcx)
+// CHECK:  encoding: [0x62,0x72,0xfd,0x48,0x8a,0x09]
+          vcompresspd %zmm9, (%rcx)
+
+// CHECK: vcompresspd %zmm9, (%rcx) {%k4}
+// CHECK:  encoding: [0x62,0x72,0xfd,0x4c,0x8a,0x09]
+          vcompresspd %zmm9, (%rcx) {%k4}
+
+// CHECK: vcompresspd %zmm9, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x32,0xfd,0x48,0x8a,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vcompresspd %zmm9, 291(%rax,%r14,8)
+
+// CHECK: vcompresspd %zmm9, 1016(%rdx)
+// CHECK:  encoding: [0x62,0x72,0xfd,0x48,0x8a,0x4a,0x7f]
+          vcompresspd %zmm9, 1016(%rdx)
+
+// CHECK: vcompresspd %zmm9, 1024(%rdx)
+// CHECK:  encoding: [0x62,0x72,0xfd,0x48,0x8a,0x8a,0x00,0x04,0x00,0x00]
+          vcompresspd %zmm9, 1024(%rdx)
+
+// CHECK: vcompresspd %zmm9, -1024(%rdx)
+// CHECK:  encoding: [0x62,0x72,0xfd,0x48,0x8a,0x4a,0x80]
+          vcompresspd %zmm9, -1024(%rdx)
+
+// CHECK: vcompresspd %zmm9, -1032(%rdx)
+// CHECK:  encoding: [0x62,0x72,0xfd,0x48,0x8a,0x8a,0xf8,0xfb,0xff,0xff]
+          vcompresspd %zmm9, -1032(%rdx)
+
+// CHECK: vcompresspd %zmm4, %zmm8
+// CHECK:  encoding: [0x62,0xd2,0xfd,0x48,0x8a,0xe0]
+          vcompresspd %zmm4, %zmm8
+
+// CHECK: vcompresspd %zmm4, %zmm8 {%k6}
+// CHECK:  encoding: [0x62,0xd2,0xfd,0x4e,0x8a,0xe0]
+          vcompresspd %zmm4, %zmm8 {%k6}
+
+// CHECK: vcompresspd %zmm4, %zmm8 {%k6} {z}
+// CHECK:  encoding: [0x62,0xd2,0xfd,0xce,0x8a,0xe0]
+          vcompresspd %zmm4, %zmm8 {%k6} {z}
+
+// CHECK: vcompressps %zmm10, (%rcx)
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x8a,0x11]
+          vcompressps %zmm10, (%rcx)
+
+// CHECK: vcompressps %zmm10, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0x72,0x7d,0x4f,0x8a,0x11]
+          vcompressps %zmm10, (%rcx) {%k7}
+
+// CHECK: vcompressps %zmm10, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x32,0x7d,0x48,0x8a,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vcompressps %zmm10, 291(%rax,%r14,8)
+
+// CHECK: vcompressps %zmm10, 508(%rdx)
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x8a,0x52,0x7f]
+          vcompressps %zmm10, 508(%rdx)
+
+// CHECK: vcompressps %zmm10, 512(%rdx)
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x8a,0x92,0x00,0x02,0x00,0x00]
+          vcompressps %zmm10, 512(%rdx)
+
+// CHECK: vcompressps %zmm10, -512(%rdx)
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x8a,0x52,0x80]
+          vcompressps %zmm10, -512(%rdx)
+
+// CHECK: vcompressps %zmm10, -516(%rdx)
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x8a,0x92,0xfc,0xfd,0xff,0xff]
+          vcompressps %zmm10, -516(%rdx)
+
+// CHECK: vcompressps %zmm14, %zmm4
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x8a,0xf4]
+          vcompressps %zmm14, %zmm4
+
+// CHECK: vcompressps %zmm14, %zmm4 {%k2}
+// CHECK:  encoding: [0x62,0x72,0x7d,0x4a,0x8a,0xf4]
+          vcompressps %zmm14, %zmm4 {%k2}
+
+// CHECK: vcompressps %zmm14, %zmm4 {%k2} {z}
+// CHECK:  encoding: [0x62,0x72,0x7d,0xca,0x8a,0xf4]
+          vcompressps %zmm14, %zmm4 {%k2} {z}
+
+// CHECK: vexpandpd (%rcx), %zmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x88,0x01]
+          vexpandpd (%rcx), %zmm24
+
+// CHECK: vexpandpd (%rcx), %zmm24 {%k4}
+// CHECK:  encoding: [0x62,0x62,0xfd,0x4c,0x88,0x01]
+          vexpandpd (%rcx), %zmm24 {%k4}
+
+// CHECK: vexpandpd (%rcx), %zmm24 {%k4} {z}
+// CHECK:  encoding: [0x62,0x62,0xfd,0xcc,0x88,0x01]
+          vexpandpd (%rcx), %zmm24 {%k4} {z}
+
+// CHECK: vexpandpd 291(%rax,%r14,8), %zmm24
+// CHECK:  encoding: [0x62,0x22,0xfd,0x48,0x88,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vexpandpd 291(%rax,%r14,8), %zmm24
+
+// CHECK: vexpandpd 1016(%rdx), %zmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x88,0x42,0x7f]
+          vexpandpd 1016(%rdx), %zmm24
+
+// CHECK: vexpandpd 1024(%rdx), %zmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x88,0x82,0x00,0x04,0x00,0x00]
+          vexpandpd 1024(%rdx), %zmm24
+
+// CHECK: vexpandpd -1024(%rdx), %zmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x88,0x42,0x80]
+          vexpandpd -1024(%rdx), %zmm24
+
+// CHECK: vexpandpd -1032(%rdx), %zmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x88,0x82,0xf8,0xfb,0xff,0xff]
+          vexpandpd -1032(%rdx), %zmm24
+
+// CHECK: vexpandpd %zmm15, %zmm23
+// CHECK:  encoding: [0x62,0xc2,0xfd,0x48,0x88,0xff]
+          vexpandpd %zmm15, %zmm23
+
+// CHECK: vexpandpd %zmm15, %zmm23 {%k5}
+// CHECK:  encoding: [0x62,0xc2,0xfd,0x4d,0x88,0xff]
+          vexpandpd %zmm15, %zmm23 {%k5}
+
+// CHECK: vexpandpd %zmm15, %zmm23 {%k5} {z}
+// CHECK:  encoding: [0x62,0xc2,0xfd,0xcd,0x88,0xff]
+          vexpandpd %zmm15, %zmm23 {%k5} {z}
+
+// CHECK: vexpandps (%rcx), %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x88,0x21]
+          vexpandps (%rcx), %zmm4
+
+// CHECK: vexpandps (%rcx), %zmm4 {%k6}
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x4e,0x88,0x21]
+          vexpandps (%rcx), %zmm4 {%k6}
+
+// CHECK: vexpandps (%rcx), %zmm4 {%k6} {z}
+// CHECK:  encoding: [0x62,0xf2,0x7d,0xce,0x88,0x21]
+          vexpandps (%rcx), %zmm4 {%k6} {z}
+
+// CHECK: vexpandps 291(%rax,%r14,8), %zmm4
+// CHECK:  encoding: [0x62,0xb2,0x7d,0x48,0x88,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vexpandps 291(%rax,%r14,8), %zmm4
+
+// CHECK: vexpandps 508(%rdx), %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x88,0x62,0x7f]
+          vexpandps 508(%rdx), %zmm4
+
+// CHECK: vexpandps 512(%rdx), %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x88,0xa2,0x00,0x02,0x00,0x00]
+          vexpandps 512(%rdx), %zmm4
+
+// CHECK: vexpandps -512(%rdx), %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x88,0x62,0x80]
+          vexpandps -512(%rdx), %zmm4
+
+// CHECK: vexpandps -516(%rdx), %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x88,0xa2,0xfc,0xfd,0xff,0xff]
+          vexpandps -516(%rdx), %zmm4
+
+// CHECK: vexpandps %zmm9, %zmm14
+// CHECK:  encoding: [0x62,0x52,0x7d,0x48,0x88,0xf1]
+          vexpandps %zmm9, %zmm14
+
+// CHECK: vexpandps %zmm9, %zmm14 {%k2}
+// CHECK:  encoding: [0x62,0x52,0x7d,0x4a,0x88,0xf1]
+          vexpandps %zmm9, %zmm14 {%k2}
+
+// CHECK: vexpandps %zmm9, %zmm14 {%k2} {z}
+// CHECK:  encoding: [0x62,0x52,0x7d,0xca,0x88,0xf1]
+          vexpandps %zmm9, %zmm14 {%k2} {z}
index dd1ac24b04eaa7755b0e1192f0f4e136271c7b88..deae35f12a31ffb0b669fdcd0851021f02286281 100644 (file)
 // CHECK: vpmovm2q %k2, %ymm30
 // CHECK:  encoding: [0x62,0x62,0xfe,0x28,0x38,0xf2]
           vpmovm2q %k2, %ymm30
+
+// CHECK: vcompresspd %xmm23, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x8a,0x39]
+          vcompresspd %xmm23, (%rcx)
+
+// CHECK: vcompresspd %xmm23, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x0e,0x8a,0x39]
+          vcompresspd %xmm23, (%rcx) {%k6}
+
+// CHECK: vcompresspd %xmm23, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x08,0x8a,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vcompresspd %xmm23, 291(%rax,%r14,8)
+
+// CHECK: vcompresspd %xmm23, 1016(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x8a,0x7a,0x7f]
+          vcompresspd %xmm23, 1016(%rdx)
+
+// CHECK: vcompresspd %xmm23, 1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x8a,0xba,0x00,0x04,0x00,0x00]
+          vcompresspd %xmm23, 1024(%rdx)
+
+// CHECK: vcompresspd %xmm23, -1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x8a,0x7a,0x80]
+          vcompresspd %xmm23, -1024(%rdx)
+
+// CHECK: vcompresspd %xmm23, -1032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x8a,0xba,0xf8,0xfb,0xff,0xff]
+          vcompresspd %xmm23, -1032(%rdx)
+
+// CHECK: vcompresspd %ymm29, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0xfd,0x28,0x8a,0x29]
+          vcompresspd %ymm29, (%rcx)
+
+// CHECK: vcompresspd %ymm29, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0x62,0xfd,0x2a,0x8a,0x29]
+          vcompresspd %ymm29, (%rcx) {%k2}
+
+// CHECK: vcompresspd %ymm29, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0xfd,0x28,0x8a,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcompresspd %ymm29, 291(%rax,%r14,8)
+
+// CHECK: vcompresspd %ymm29, 1016(%rdx)
+// CHECK:  encoding: [0x62,0x62,0xfd,0x28,0x8a,0x6a,0x7f]
+          vcompresspd %ymm29, 1016(%rdx)
+
+// CHECK: vcompresspd %ymm29, 1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0xfd,0x28,0x8a,0xaa,0x00,0x04,0x00,0x00]
+          vcompresspd %ymm29, 1024(%rdx)
+
+// CHECK: vcompresspd %ymm29, -1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0xfd,0x28,0x8a,0x6a,0x80]
+          vcompresspd %ymm29, -1024(%rdx)
+
+// CHECK: vcompresspd %ymm29, -1032(%rdx)
+// CHECK:  encoding: [0x62,0x62,0xfd,0x28,0x8a,0xaa,0xf8,0xfb,0xff,0xff]
+          vcompresspd %ymm29, -1032(%rdx)
+
+// CHECK: vcompresspd %xmm27, %xmm20
+// CHECK:  encoding: [0x62,0x22,0xfd,0x08,0x8a,0xdc]
+          vcompresspd %xmm27, %xmm20
+
+// CHECK: vcompresspd %xmm27, %xmm20 {%k2}
+// CHECK:  encoding: [0x62,0x22,0xfd,0x0a,0x8a,0xdc]
+          vcompresspd %xmm27, %xmm20 {%k2}
+
+// CHECK: vcompresspd %xmm27, %xmm20 {%k2} {z}
+// CHECK:  encoding: [0x62,0x22,0xfd,0x8a,0x8a,0xdc]
+          vcompresspd %xmm27, %xmm20 {%k2} {z}
+
+// CHECK: vcompresspd %ymm20, %ymm24
+// CHECK:  encoding: [0x62,0x82,0xfd,0x28,0x8a,0xe0]
+          vcompresspd %ymm20, %ymm24
+
+// CHECK: vcompresspd %ymm20, %ymm24 {%k3}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x2b,0x8a,0xe0]
+          vcompresspd %ymm20, %ymm24 {%k3}
+
+// CHECK: vcompresspd %ymm20, %ymm24 {%k3} {z}
+// CHECK:  encoding: [0x62,0x82,0xfd,0xab,0x8a,0xe0]
+          vcompresspd %ymm20, %ymm24 {%k3} {z}
+
+// CHECK: vcompressps %xmm21, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x8a,0x29]
+          vcompressps %xmm21, (%rcx)
+
+// CHECK: vcompressps %xmm21, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x0f,0x8a,0x29]
+          vcompressps %xmm21, (%rcx) {%k7}
+
+// CHECK: vcompressps %xmm21, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x8a,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcompressps %xmm21, 291(%rax,%r14,8)
+
+// CHECK: vcompressps %xmm21, 508(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x8a,0x6a,0x7f]
+          vcompressps %xmm21, 508(%rdx)
+
+// CHECK: vcompressps %xmm21, 512(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x8a,0xaa,0x00,0x02,0x00,0x00]
+          vcompressps %xmm21, 512(%rdx)
+
+// CHECK: vcompressps %xmm21, -512(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x8a,0x6a,0x80]
+          vcompressps %xmm21, -512(%rdx)
+
+// CHECK: vcompressps %xmm21, -516(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x8a,0xaa,0xfc,0xfd,0xff,0xff]
+          vcompressps %xmm21, -516(%rdx)
+
+// CHECK: vcompressps %ymm24, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x8a,0x01]
+          vcompressps %ymm24, (%rcx)
+
+// CHECK: vcompressps %ymm24, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x2f,0x8a,0x01]
+          vcompressps %ymm24, (%rcx) {%k7}
+
+// CHECK: vcompressps %ymm24, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x8a,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcompressps %ymm24, 291(%rax,%r14,8)
+
+// CHECK: vcompressps %ymm24, 508(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x8a,0x42,0x7f]
+          vcompressps %ymm24, 508(%rdx)
+
+// CHECK: vcompressps %ymm24, 512(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x8a,0x82,0x00,0x02,0x00,0x00]
+          vcompressps %ymm24, 512(%rdx)
+
+// CHECK: vcompressps %ymm24, -512(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x8a,0x42,0x80]
+          vcompressps %ymm24, -512(%rdx)
+
+// CHECK: vcompressps %ymm24, -516(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x8a,0x82,0xfc,0xfd,0xff,0xff]
+          vcompressps %ymm24, -516(%rdx)
+
+// CHECK: vcompressps %xmm29, %xmm28
+// CHECK:  encoding: [0x62,0x02,0x7d,0x08,0x8a,0xec]
+          vcompressps %xmm29, %xmm28
+
+// CHECK: vcompressps %xmm29, %xmm28 {%k3}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x0b,0x8a,0xec]
+          vcompressps %xmm29, %xmm28 {%k3}
+
+// CHECK: vcompressps %xmm29, %xmm28 {%k3} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x8b,0x8a,0xec]
+          vcompressps %xmm29, %xmm28 {%k3} {z}
+
+// CHECK: vcompressps %ymm25, %ymm23
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x8a,0xcf]
+          vcompressps %ymm25, %ymm23
+
+// CHECK: vcompressps %ymm25, %ymm23 {%k6}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2e,0x8a,0xcf]
+          vcompressps %ymm25, %ymm23 {%k6}
+
+// CHECK: vcompressps %ymm25, %ymm23 {%k6} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xae,0x8a,0xcf]
+          vcompressps %ymm25, %ymm23 {%k6} {z}
+
+// CHECK: vexpandpd (%rcx), %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x88,0x39]
+          vexpandpd (%rcx), %xmm23
+
+// CHECK: vexpandpd (%rcx), %xmm23 {%k3}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x0b,0x88,0x39]
+          vexpandpd (%rcx), %xmm23 {%k3}
+
+// CHECK: vexpandpd (%rcx), %xmm23 {%k3} {z}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x8b,0x88,0x39]
+          vexpandpd (%rcx), %xmm23 {%k3} {z}
+
+// CHECK: vexpandpd 291(%rax,%r14,8), %xmm23
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x08,0x88,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vexpandpd 291(%rax,%r14,8), %xmm23
+
+// CHECK: vexpandpd 1016(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x88,0x7a,0x7f]
+          vexpandpd 1016(%rdx), %xmm23
+
+// CHECK: vexpandpd 1024(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x88,0xba,0x00,0x04,0x00,0x00]
+          vexpandpd 1024(%rdx), %xmm23
+
+// CHECK: vexpandpd -1024(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x88,0x7a,0x80]
+          vexpandpd -1024(%rdx), %xmm23
+
+// CHECK: vexpandpd -1032(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x88,0xba,0xf8,0xfb,0xff,0xff]
+          vexpandpd -1032(%rdx), %xmm23
+
+// CHECK: vexpandpd (%rcx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x88,0x31]
+          vexpandpd (%rcx), %ymm22
+
+// CHECK: vexpandpd (%rcx), %ymm22 {%k5}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x2d,0x88,0x31]
+          vexpandpd (%rcx), %ymm22 {%k5}
+
+// CHECK: vexpandpd (%rcx), %ymm22 {%k5} {z}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0xad,0x88,0x31]
+          vexpandpd (%rcx), %ymm22 {%k5} {z}
+
+// CHECK: vexpandpd 291(%rax,%r14,8), %ymm22
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x88,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vexpandpd 291(%rax,%r14,8), %ymm22
+
+// CHECK: vexpandpd 1016(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x88,0x72,0x7f]
+          vexpandpd 1016(%rdx), %ymm22
+
+// CHECK: vexpandpd 1024(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x88,0xb2,0x00,0x04,0x00,0x00]
+          vexpandpd 1024(%rdx), %ymm22
+
+// CHECK: vexpandpd -1024(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x88,0x72,0x80]
+          vexpandpd -1024(%rdx), %ymm22
+
+// CHECK: vexpandpd -1032(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x88,0xb2,0xf8,0xfb,0xff,0xff]
+          vexpandpd -1032(%rdx), %ymm22
+
+// CHECK: vexpandpd %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x02,0xfd,0x08,0x88,0xe9]
+          vexpandpd %xmm25, %xmm29
+
+// CHECK: vexpandpd %xmm25, %xmm29 {%k7}
+// CHECK:  encoding: [0x62,0x02,0xfd,0x0f,0x88,0xe9]
+          vexpandpd %xmm25, %xmm29 {%k7}
+
+// CHECK: vexpandpd %xmm25, %xmm29 {%k7} {z}
+// CHECK:  encoding: [0x62,0x02,0xfd,0x8f,0x88,0xe9]
+          vexpandpd %xmm25, %xmm29 {%k7} {z}
+
+// CHECK: vexpandpd %ymm27, %ymm21
+// CHECK:  encoding: [0x62,0x82,0xfd,0x28,0x88,0xeb]
+          vexpandpd %ymm27, %ymm21
+
+// CHECK: vexpandpd %ymm27, %ymm21 {%k2}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x2a,0x88,0xeb]
+          vexpandpd %ymm27, %ymm21 {%k2}
+
+// CHECK: vexpandpd %ymm27, %ymm21 {%k2} {z}
+// CHECK:  encoding: [0x62,0x82,0xfd,0xaa,0x88,0xeb]
+          vexpandpd %ymm27, %ymm21 {%k2} {z}
+
+// CHECK: vexpandps (%rcx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x88,0x11]
+          vexpandps (%rcx), %xmm18
+
+// CHECK: vexpandps (%rcx), %xmm18 {%k1}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x09,0x88,0x11]
+          vexpandps (%rcx), %xmm18 {%k1}
+
+// CHECK: vexpandps (%rcx), %xmm18 {%k1} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x89,0x88,0x11]
+          vexpandps (%rcx), %xmm18 {%k1} {z}
+
+// CHECK: vexpandps 291(%rax,%r14,8), %xmm18
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x88,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vexpandps 291(%rax,%r14,8), %xmm18
+
+// CHECK: vexpandps 508(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x88,0x52,0x7f]
+          vexpandps 508(%rdx), %xmm18
+
+// CHECK: vexpandps 512(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x88,0x92,0x00,0x02,0x00,0x00]
+          vexpandps 512(%rdx), %xmm18
+
+// CHECK: vexpandps -512(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x88,0x52,0x80]
+          vexpandps -512(%rdx), %xmm18
+
+// CHECK: vexpandps -516(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x88,0x92,0xfc,0xfd,0xff,0xff]
+          vexpandps -516(%rdx), %xmm18
+
+// CHECK: vexpandps (%rcx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x88,0x39]
+          vexpandps (%rcx), %ymm23
+
+// CHECK: vexpandps (%rcx), %ymm23 {%k7}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x2f,0x88,0x39]
+          vexpandps (%rcx), %ymm23 {%k7}
+
+// CHECK: vexpandps (%rcx), %ymm23 {%k7} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0xaf,0x88,0x39]
+          vexpandps (%rcx), %ymm23 {%k7} {z}
+
+// CHECK: vexpandps 291(%rax,%r14,8), %ymm23
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x28,0x88,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vexpandps 291(%rax,%r14,8), %ymm23
+
+// CHECK: vexpandps 508(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x88,0x7a,0x7f]
+          vexpandps 508(%rdx), %ymm23
+
+// CHECK: vexpandps 512(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x88,0xba,0x00,0x02,0x00,0x00]
+          vexpandps 512(%rdx), %ymm23
+
+// CHECK: vexpandps -512(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x88,0x7a,0x80]
+          vexpandps -512(%rdx), %ymm23
+
+// CHECK: vexpandps -516(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x88,0xba,0xfc,0xfd,0xff,0xff]
+          vexpandps -516(%rdx), %ymm23
+
+// CHECK: vexpandps %xmm19, %xmm29
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x88,0xeb]
+          vexpandps %xmm19, %xmm29
+
+// CHECK: vexpandps %xmm19, %xmm29 {%k5}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x0d,0x88,0xeb]
+          vexpandps %xmm19, %xmm29 {%k5}
+
+// CHECK: vexpandps %xmm19, %xmm29 {%k5} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x8d,0x88,0xeb]
+          vexpandps %xmm19, %xmm29 {%k5} {z}
+
+// CHECK: vexpandps %ymm29, %ymm29
+// CHECK:  encoding: [0x62,0x02,0x7d,0x28,0x88,0xed]
+          vexpandps %ymm29, %ymm29
+
+// CHECK: vexpandps %ymm29, %ymm29 {%k5}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x2d,0x88,0xed]
+          vexpandps %ymm29, %ymm29 {%k5}
+
+// CHECK: vexpandps %ymm29, %ymm29 {%k5} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0xad,0x88,0xed]
+          vexpandps %ymm29, %ymm29 {%k5} {z}