From: Bruno Cardoso Lopes Date: Wed, 30 Jun 2010 01:58:37 +0000 (+0000) Subject: - Add AVX form of all SSE2 logical instructions X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=5a3a4767502341f304a8015580ab05ed74161ab0;p=oota-llvm.git - Add AVX form of all SSE2 logical instructions - Add VEX encoding bits to x86 MRM0r-MRM7r git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107238 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 038d4450407..0b3621469dc 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2440,6 +2440,68 @@ defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw>; // SSE2 - Packed Integer Logical Instructions //===---------------------------------------------------------------------===// +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in { +defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", + int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>, + VEX_4V; +defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", + int_x86_sse2_psll_d, int_x86_sse2_pslli_d, 0>, + VEX_4V; +defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", + int_x86_sse2_psll_q, int_x86_sse2_pslli_q, 0>, + VEX_4V; + +defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", + int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, 0>, + VEX_4V; +defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", + int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, 0>, + VEX_4V; +defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", + int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, 0>, + VEX_4V; + +defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", + int_x86_sse2_psra_w, int_x86_sse2_psrai_w, 0>, + VEX_4V; +defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", + int_x86_sse2_psra_d, int_x86_sse2_psrai_d, 0>, + VEX_4V; + +let isCommutable = 1 in { +defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 0>, VEX_4V; +defm VPOR : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 0>, VEX_4V; +defm VPXOR : PDI_binop_rm_v2i64<0xEF, "vpxor", xor, 0>, VEX_4V; +} + +let ExeDomain = SSEPackedInt in { + let neverHasSideEffects = 1 in { + // 128-bit logical shifts. + def VPSLLDQri : PDIi8<0x73, MRM7r, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + VEX_4V; + def VPSRLDQri : PDIi8<0x73, MRM3r, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + VEX_4V; + // PSRADQri doesn't exist in SSE[1-3]. + } + def VPANDNrr : PDI<0xDF, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), + VR128:$src2)))]>, VEX_4V; + + def VPANDNrm : PDI<0xDF, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), + (memopv2i64 addr:$src2))))]>, + VEX_4V; +} +} + let Constraints = "$src1 = $dst" in { defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", int_x86_sse2_psll_w, int_x86_sse2_pslli_w>; diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 5dd668400be..80f56e1c22b 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -60,6 +60,27 @@ public: static unsigned GetX86RegNum(const MCOperand &MO) { return X86RegisterInfo::getX86RegNum(MO.getReg()); } + + // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range + // 0-7 and the difference between the 2 groups is given by the REX prefix. + // In the VEX prefix, registers are seen sequencially from 0-15 and encoded + // in 1's complement form, example: + // + // ModRM field => XMM9 => 1 + // VEX.VVVV => XMM9 => ~9 + // + // See table 4-35 of Intel AVX Programming Reference for details. + static unsigned char getVEXRegisterEncoding(const MCInst &MI, + unsigned OpNum) { + unsigned SrcReg = MI.getOperand(OpNum).getReg(); + unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum)); + if (SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15) + SrcRegNum += 8; + + // The registers represented through VEX_VVVV should + // be encoded in 1's complement form. + return (~SrcRegNum) & 0xf; + } void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const { OS << (char)C; @@ -134,7 +155,6 @@ MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &, return new X86MCCodeEmitter(TM, Ctx, true); } - /// isDisp8 - Return true if this signed displacement fits in a 8-bit /// sign-extended field. static bool isDisp8(int Value) { @@ -469,29 +489,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; - // If the memory destination has been checked first, - // go back to the first operand + // CurOp and NumOps are equal when VEX_R represents a register used + // to index a memory destination (which is the last operand) CurOp = (CurOp == NumOps) ? 0 : CurOp+1; - // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the - // range 0-7 and the difference between the 2 groups is given by the - // REX prefix. In the VEX prefix, registers are seen sequencially - // from 0-15 and encoded in 1's complement form, example: - // - // ModRM field => XMM9 => 1 - // VEX.VVVV => XMM9 => ~9 - // - // See table 4-35 of Intel AVX Programming Reference for details. if (HasVEX_4V) { - unsigned SrcReg = MI.getOperand(CurOp).getReg(); - unsigned SrcRegNum = GetX86RegNum(MI.getOperand(1)); - if (SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15) - SrcRegNum += 8; - - // The registers represented through VEX_VVVV should - // be encoded in 1's complement form. - VEX_4V = (~SrcRegNum) & 0xf; - + VEX_4V = getVEXRegisterEncoding(MI, CurOp); CurOp++; } @@ -505,7 +508,17 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_X = 0x0; } break; - default: + default: // MRM0r-MRM7r + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + + CurOp++; + for (; CurOp != NumOps; ++CurOp) { + const MCOperand &MO = MI.getOperand(CurOp); + if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + VEX_B = 0x0; + } + break; assert(0 && "Not implemented!"); } @@ -831,6 +844,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRM2r: case X86II::MRM3r: case X86II::MRM4r: case X86II::MRM5r: case X86II::MRM6r: case X86II::MRM7r: + if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). + CurOp++; EmitByte(BaseOpcode, CurByte, OS); EmitRegModRMByte(MI.getOperand(CurOp++), (TSFlags & X86II::FormMask)-X86II::MRM0r, diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index 5b677e9a872..398430e5bb0 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -11182,3 +11182,143 @@ // CHECK: encoding: [0xc5,0xe9,0xf6,0x18] vpsadbw (%eax), %xmm2, %xmm3 +// CHECK: vpsllw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf1,0xd9] + vpsllw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsllw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf1,0x18] + vpsllw (%eax), %xmm2, %xmm3 + +// CHECK: vpslld %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf2,0xd9] + vpslld %xmm1, %xmm2, %xmm3 + +// CHECK: vpslld (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf2,0x18] + vpslld (%eax), %xmm2, %xmm3 + +// CHECK: vpsllq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf3,0xd9] + vpsllq %xmm1, %xmm2, %xmm3 + +// CHECK: vpsllq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xf3,0x18] + vpsllq (%eax), %xmm2, %xmm3 + +// CHECK: vpsraw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe1,0xd9] + vpsraw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsraw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe1,0x18] + vpsraw (%eax), %xmm2, %xmm3 + +// CHECK: vpsrad %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe2,0xd9] + vpsrad %xmm1, %xmm2, %xmm3 + +// CHECK: vpsrad (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xe2,0x18] + vpsrad (%eax), %xmm2, %xmm3 + +// CHECK: vpsrlw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd1,0xd9] + vpsrlw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsrlw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd1,0x18] + vpsrlw (%eax), %xmm2, %xmm3 + +// CHECK: vpsrld %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd2,0xd9] + vpsrld %xmm1, %xmm2, %xmm3 + +// CHECK: vpsrld (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd2,0x18] + vpsrld (%eax), %xmm2, %xmm3 + +// CHECK: vpsrlq %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd3,0xd9] + vpsrlq %xmm1, %xmm2, %xmm3 + +// CHECK: vpsrlq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xd3,0x18] + vpsrlq (%eax), %xmm2, %xmm3 + +// CHECK: vpslld $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a] + vpslld $10, %xmm2, %xmm3 + +// CHECK: vpslldq $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x73,0xfa,0x0a] + vpslldq $10, %xmm2, %xmm3 + +// CHECK: vpsllq $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x73,0xf2,0x0a] + vpsllq $10, %xmm2, %xmm3 + +// CHECK: vpsllw $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x71,0xf2,0x0a] + vpsllw $10, %xmm2, %xmm3 + +// CHECK: vpsrad $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x72,0xe2,0x0a] + vpsrad $10, %xmm2, %xmm3 + +// CHECK: vpsraw $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x71,0xe2,0x0a] + vpsraw $10, %xmm2, %xmm3 + +// CHECK: vpsrld $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x72,0xd2,0x0a] + vpsrld $10, %xmm2, %xmm3 + +// CHECK: vpsrldq $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x73,0xda,0x0a] + vpsrldq $10, %xmm2, %xmm3 + +// CHECK: vpsrlq $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x73,0xd2,0x0a] + vpsrlq $10, %xmm2, %xmm3 + +// CHECK: vpsrlw $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x71,0xd2,0x0a] + vpsrlw $10, %xmm2, %xmm3 + +// CHECK: vpslld $10, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a] + vpslld $10, %xmm2, %xmm3 + +// CHECK: vpand %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdb,0xd9] + vpand %xmm1, %xmm2, %xmm3 + +// CHECK: vpand (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdb,0x18] + vpand (%eax), %xmm2, %xmm3 + +// CHECK: vpor %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xeb,0xd9] + vpor %xmm1, %xmm2, %xmm3 + +// CHECK: vpor (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xeb,0x18] + vpor (%eax), %xmm2, %xmm3 + +// CHECK: vpxor %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xef,0xd9] + vpxor %xmm1, %xmm2, %xmm3 + +// CHECK: vpxor (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xef,0x18] + vpxor (%eax), %xmm2, %xmm3 + +// CHECK: vpandn %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdf,0xd9] + vpandn %xmm1, %xmm2, %xmm3 + +// CHECK: vpandn (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0xdf,0x18] + vpandn (%eax), %xmm2, %xmm3 + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index eaaecd4f7be..275e1c7b8b2 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -1222,3 +1222,143 @@ pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0xc5,0x19,0xf6,0x28] vpsadbw (%rax), %xmm12, %xmm13 +// CHECK: vpsllw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf1,0xeb] + vpsllw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsllw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf1,0x28] + vpsllw (%rax), %xmm12, %xmm13 + +// CHECK: vpslld %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf2,0xeb] + vpslld %xmm11, %xmm12, %xmm13 + +// CHECK: vpslld (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf2,0x28] + vpslld (%rax), %xmm12, %xmm13 + +// CHECK: vpsllq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xf3,0xeb] + vpsllq %xmm11, %xmm12, %xmm13 + +// CHECK: vpsllq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xf3,0x28] + vpsllq (%rax), %xmm12, %xmm13 + +// CHECK: vpsraw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe1,0xeb] + vpsraw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsraw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe1,0x28] + vpsraw (%rax), %xmm12, %xmm13 + +// CHECK: vpsrad %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xe2,0xeb] + vpsrad %xmm11, %xmm12, %xmm13 + +// CHECK: vpsrad (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xe2,0x28] + vpsrad (%rax), %xmm12, %xmm13 + +// CHECK: vpsrlw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd1,0xeb] + vpsrlw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsrlw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd1,0x28] + vpsrlw (%rax), %xmm12, %xmm13 + +// CHECK: vpsrld %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd2,0xeb] + vpsrld %xmm11, %xmm12, %xmm13 + +// CHECK: vpsrld (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd2,0x28] + vpsrld (%rax), %xmm12, %xmm13 + +// CHECK: vpsrlq %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xd3,0xeb] + vpsrlq %xmm11, %xmm12, %xmm13 + +// CHECK: vpsrlq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xd3,0x28] + vpsrlq (%rax), %xmm12, %xmm13 + +// CHECK: vpslld $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a] + vpslld $10, %xmm12, %xmm13 + +// CHECK: vpslldq $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xfc,0x0a] + vpslldq $10, %xmm12, %xmm13 + +// CHECK: vpsllq $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xf4,0x0a] + vpsllq $10, %xmm12, %xmm13 + +// CHECK: vpsllw $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xf4,0x0a] + vpsllw $10, %xmm12, %xmm13 + +// CHECK: vpsrad $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xe4,0x0a] + vpsrad $10, %xmm12, %xmm13 + +// CHECK: vpsraw $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xe4,0x0a] + vpsraw $10, %xmm12, %xmm13 + +// CHECK: vpsrld $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xd4,0x0a] + vpsrld $10, %xmm12, %xmm13 + +// CHECK: vpsrldq $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xdc,0x0a] + vpsrldq $10, %xmm12, %xmm13 + +// CHECK: vpsrlq $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xd4,0x0a] + vpsrlq $10, %xmm12, %xmm13 + +// CHECK: vpsrlw $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xd4,0x0a] + vpsrlw $10, %xmm12, %xmm13 + +// CHECK: vpslld $10, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a] + vpslld $10, %xmm12, %xmm13 + +// CHECK: vpand %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xdb,0xeb] + vpand %xmm11, %xmm12, %xmm13 + +// CHECK: vpand (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xdb,0x28] + vpand (%rax), %xmm12, %xmm13 + +// CHECK: vpor %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xeb,0xeb] + vpor %xmm11, %xmm12, %xmm13 + +// CHECK: vpor (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xeb,0x28] + vpor (%rax), %xmm12, %xmm13 + +// CHECK: vpxor %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xef,0xeb] + vpxor %xmm11, %xmm12, %xmm13 + +// CHECK: vpxor (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xef,0x28] + vpxor (%rax), %xmm12, %xmm13 + +// CHECK: vpandn %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0xdf,0xeb] + vpandn %xmm11, %xmm12, %xmm13 + +// CHECK: vpandn (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0xdf,0x28] + vpandn (%rax), %xmm12, %xmm13 +