From 788184365a4a949eb83b0036045e6793bc6c20f0 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Thu, 24 Jun 2010 20:48:23 +0000 Subject: [PATCH] - Add AVX COMI{SS,SD}{rr,rm} and UCOMI{SS,SD}{rr,rm}. - Fix a small VEX encoding issue. - Move compare instructions to their appropriate place. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106787 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 269 +++++++++++------------- lib/Target/X86/X86MCCodeEmitter.cpp | 33 +-- test/MC/AsmParser/X86/x86_32-encoding.s | 32 +++ test/MC/AsmParser/X86/x86_64-encoding.s | 33 +++ 4 files changed, 203 insertions(+), 164 deletions(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 1afda4a2180..82b056ace10 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -484,14 +484,6 @@ let Constraints = "$src1 = $dst" in { (loadi32 addr:$src2)))]>; } -// Compare Instructions -let Defs = [EFLAGS] in { -def COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "comiss\t{$src2, $src1|$src1, $src2}", []>; -def COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), - "comiss\t{$src2, $src1|$src1, $src2}", []>; -} // Defs = [EFLAGS] - //===----------------------------------------------------------------------===// // SSE 1 & 2 - Move Instructions //===----------------------------------------------------------------------===// @@ -704,97 +696,105 @@ let Constraints = "$src1 = $dst" in { // SSE 1 & 2 - Compare Instructions //===----------------------------------------------------------------------===// -multiclass sse12_cmp { - def rri : PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, sse_imm_op:$cc), asm, - [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>; - def rmi : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, f128mem:$src, sse_imm_op:$cc), asm, - [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>; -} - -// FIXME: rename instructions to only use the class above -multiclass sse12_cmp_alt { - def rri_alt : PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, sse_imm_op:$src2), asm, - [], d>; - def rmi_alt : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, f128mem:$src, sse_imm_op:$src2), asm, - [], d>; -} - +// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions multiclass sse12_cmp_scalar { + string asm, string asm_alt> { def rr : SIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, sse_imm_op:$cc), + (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm, []>; let mayLoad = 1 in def rm : SIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src, sse_imm_op:$cc), - asm, []>; -} - -// FIXME: rename instructions to only use the class above -multiclass sse12_cmp_scalar_alt { - def rr_alt : SIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, sse_imm_op:$src2), - asm, []>; - let mayLoad = 1 in - def rm_alt : SIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src, sse_imm_op:$src2), + (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc), asm, []>; -} - -let neverHasSideEffects = 1, isAsmParserOnly = 1 in { - defm VCMPSS : sse12_cmp_scalar, - XS, VEX_4V; - defm VCMPSD : sse12_cmp_scalar, - XD, VEX_4V; - // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1 in { - defm VCMPSS : sse12_cmp_scalar_alt, - XS, VEX_4V; - defm VCMPSD : sse12_cmp_scalar_alt, - XD, VEX_4V; + def rr_alt : SIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2), + asm_alt, []>; + let mayLoad = 1 in + def rm_alt : SIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2), + asm_alt, []>; } } +let neverHasSideEffects = 1, isAsmParserOnly = 1 in { + defm VCMPSS : sse12_cmp_scalar, + XS, VEX_4V; + defm VCMPSD : sse12_cmp_scalar, + XD, VEX_4V; +} + let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { - defm CMPSS : sse12_cmp_scalar, XS; - defm CMPSD : sse12_cmp_scalar, XD; + defm CMPSS : sse12_cmp_scalar, XS; + defm CMPSD : sse12_cmp_scalar, XD; +} + +// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS +multiclass sse12_ord_cmp opc, RegisterClass RC, SDNode OpNode, + ValueType vt, X86MemOperand x86memop, + PatFrag ld_frag, string OpcodeStr, Domain d> { + def rr: PI; + def rm: PI; +} - // Accept explicit immediate argument form instead of comparison code. +let Defs = [EFLAGS] in { let isAsmParserOnly = 1 in { - defm CMPSS : sse12_cmp_scalar_alt, XS; - defm CMPSD : sse12_cmp_scalar_alt, XD; + defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, + "ucomiss", SSEPackedSingle>, VEX; + defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, + "ucomisd", SSEPackedDouble>, OpSize, VEX; + let Pattern = [] in { + defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, + "comiss", SSEPackedSingle>, VEX; + defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, + "comisd", SSEPackedDouble>, OpSize, VEX; + } + + defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, + load, "ucomiss", SSEPackedSingle>, VEX; + defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, + load, "ucomisd", SSEPackedDouble>, OpSize, VEX; + + defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, + load, "comiss", SSEPackedSingle>, VEX; + defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, + load, "comisd", SSEPackedDouble>, OpSize, VEX; + } + defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, + "ucomiss", SSEPackedSingle>, TB; + defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, + "ucomisd", SSEPackedDouble>, TB, OpSize; + + let Pattern = [] in { + defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, + "comiss", SSEPackedSingle>, TB; + defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, + "comisd", SSEPackedDouble>, TB, OpSize; } -} -let Defs = [EFLAGS] in { -def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2), - "ucomiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp FR32:$src1, FR32:$src2))]>; -def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2), - "ucomiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp FR32:$src1, (loadf32 addr:$src2)))]>; -def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2), - "ucomisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp FR64:$src1, FR64:$src2))]>; -def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2), - "ucomisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp FR64:$src1, (loadf64 addr:$src2)))]>; + defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, + load, "ucomiss", SSEPackedSingle>, TB; + defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, + load, "ucomisd", SSEPackedDouble>, TB, OpSize; + + defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load, + "comiss", SSEPackedSingle>, TB; + defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load, + "comisd", SSEPackedDouble>, TB, OpSize; } // Defs = [EFLAGS] // Aliases to match intrinsics which expect XMM operand(s). @@ -827,75 +827,46 @@ let Constraints = "$src1 = $dst" in { (load addr:$src), imm:$cc))]>; } -let Defs = [EFLAGS] in { -def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "ucomiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1), - VR128:$src2))]>; -def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2), - "ucomiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1), - (load addr:$src2)))]>; -def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "ucomisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1), - VR128:$src2))]>; -def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2), - "ucomisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1), - (load addr:$src2)))]>; - -def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "comiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86comi (v4f32 VR128:$src1), - VR128:$src2))]>; -def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), - "comiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86comi (v4f32 VR128:$src1), - (load addr:$src2)))]>; -def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "comisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86comi (v2f64 VR128:$src1), - VR128:$src2))]>; -def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), - "comisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86comi (v2f64 VR128:$src1), - (load addr:$src2)))]>; -} // Defs = [EFLAGS] - -let Constraints = "$src1 = $dst" in { - defm CMPPS : sse12_cmp, - TB; - defm CMPPD : sse12_cmp, - TB, OpSize; +// sse12_cmp_packed - sse 1 & 2 compared packed instructions +multiclass sse12_cmp_packed { + def rri : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>; + def rmi : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>; + // Accept explicit immediate argument form instead of comparison code. + let isAsmParserOnly = 1 in { + def rri_alt : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2), + asm_alt, [], d>; + def rmi_alt : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2), + asm_alt, [], d>; + } } + let isAsmParserOnly = 1 in { - defm VCMPPS : sse12_cmp, VEX_4V; - defm VCMPPD : sse12_cmp, VEX_4V; + defm VCMPPD : sse12_cmp_packed, OpSize, VEX_4V; -} - -let isAsmParserOnly = 1, Pattern = [] in { - // Accept explicit immediate argument form instead of comparison code. - let Constraints = "$src1 = $dst" in { - defm CMPPS : sse12_cmp_alt, TB; - defm CMPPD : sse12_cmp_alt, TB, OpSize; - } - defm VCMPPS : sse12_cmp_alt, VEX_4V; - defm VCMPPD : sse12_cmp_alt, OpSize, VEX_4V; + SSEPackedDouble>, OpSize, VEX_4V; +} +let Constraints = "$src1 = $dst" in { + defm CMPPS : sse12_cmp_packed, TB; + defm CMPPD : sse12_cmp_packed, TB, OpSize; } def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), @@ -1599,10 +1570,6 @@ def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", []>; def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", []>; -def COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "comisd\t{$src2, $src1|$src1, $src2}", []>; -def COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), - "comisd\t{$src2, $src1|$src1, $src2}", []>; // SSE2 instructions with XS prefix def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index dbdc2b5915a..2223d549b49 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -364,6 +364,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, if ((TSFlags & X86II::FormMask) == X86II::Pseudo) return; + bool HasVEX_4V = false; + if ((TSFlags >> 32) & X86II::VEX_4V) + HasVEX_4V = true; + // VEX_R: opcode externsion equivalent to REX.R in // 1's complement (inverted) form // @@ -447,8 +451,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, } unsigned NumOps = MI.getNumOperands(); - unsigned i = 0; - unsigned SrcReg = 0, SrcRegNum = 0; + unsigned i = 0, CurOp = 0; bool IsSrcMem = false; switch (TSFlags & X86II::FormMask) { @@ -456,9 +459,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::MRMSrcMem: IsSrcMem = true; case X86II::MRMSrcReg: - if (MI.getOperand(0).isReg() && - X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + if (MI.getOperand(CurOp).isReg() && + X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; + CurOp++; // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the // range 0-7 and the difference between the 2 groups is given by the @@ -469,17 +473,20 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // VEX.VVVV => XMM9 => ~9 // // See table 4-35 of Intel AVX Programming Reference for details. - SrcReg = MI.getOperand(1).getReg(); - SrcRegNum = GetX86RegNum(MI.getOperand(1)); - if (SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15) - SrcRegNum += 8; - - // The registers represented through VEX_VVVV should - // be encoded in 1's complement form. - if ((TSFlags >> 32) & X86II::VEX_4V) + if (HasVEX_4V) { + unsigned SrcReg = MI.getOperand(CurOp).getReg(); + unsigned SrcRegNum = GetX86RegNum(MI.getOperand(1)); + if (SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15) + SrcRegNum += 8; + + // The registers represented through VEX_VVVV should + // be encoded in 1's complement form. VEX_4V = (~SrcRegNum) & 0xf; - i = 2; // Skip the VEX.VVVV operand. + CurOp++; + } + + i = CurOp; for (; i != NumOps; ++i) { const MCOperand &MO = MI.getOperand(i); if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index f9127f4bd06..992a2ee0161 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -10654,3 +10654,35 @@ // CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x03] vcmpunordsd -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: vucomiss %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x2e,0xd1] + vucomiss %xmm1, %xmm2 + +// CHECK: vucomiss (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x2e,0x10] + vucomiss (%eax), %xmm2 + +// CHECK: vcomiss %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x2f,0xd1] + vcomiss %xmm1, %xmm2 + +// CHECK: vcomiss (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x2f,0x10] + vcomiss (%eax), %xmm2 + +// CHECK: vucomisd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x2e,0xd1] + vucomisd %xmm1, %xmm2 + +// CHECK: vucomisd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x2e,0x10] + vucomisd (%eax), %xmm2 + +// CHECK: vcomisd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x2f,0xd1] + vcomisd %xmm1, %xmm2 + +// CHECK: vcomisd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x2f,0x10] + vcomisd (%eax), %xmm2 + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index 2eca58df4d1..ef842c80ae1 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -710,3 +710,36 @@ pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x03] vcmpunordsd -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: vucomiss %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x2e,0xe3] + vucomiss %xmm11, %xmm12 + +// CHECK: vucomiss (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x2e,0x20] + vucomiss (%rax), %xmm12 + +// CHECK: vcomiss %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x2f,0xe3] + vcomiss %xmm11, %xmm12 + +// CHECK: vcomiss (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x2f,0x20] + vcomiss (%rax), %xmm12 + +// CHECK: vucomisd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x79,0x2e,0xe3] + vucomisd %xmm11, %xmm12 + +// CHECK: vucomisd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x79,0x2e,0x20] + vucomisd (%rax), %xmm12 + +// CHECK: vcomisd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x79,0x2f,0xe3] + vcomisd %xmm11, %xmm12 + +// CHECK: vcomisd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x79,0x2f,0x20] + vcomisd (%rax), %xmm12 + + -- 2.34.1