From 34a491bd8ea20670e9c6cb02cbd0388d469bf97f Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Thu, 24 Jun 2010 00:15:50 +0000 Subject: [PATCH] Move SSE and AVX shuffle, unpack and compare code to more appropriate places git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106702 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 351 ++++++++++++++++++---------------- 1 file changed, 185 insertions(+), 166 deletions(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 5a4b4ba3750..3ea7ca99434 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -444,43 +444,6 @@ multiclass sse12_fp_packed_int opc, string OpcodeStr, RegisterClass RC, RC:$src1, (mem_frag addr:$src2)))], d>; } -/// sse12_unpack_interleave - SSE 1 & 2 unpack and interleave -multiclass sse12_unpack_interleave opc, PatFrag OpNode, ValueType vt, - PatFrag mem_frag, RegisterClass RC, - X86MemOperand x86memop, string asm, - Domain d> { - def rr : PI; - def rm : PI; -} - -multiclass sse12_cmp { - def rri : PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, sse_imm_op:$cc), asm, - [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>; - def rmi : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, f128mem:$src, sse_imm_op:$cc), asm, - [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>; -} - -// FIXME: rename instructions to only use the class above -multiclass sse12_cmp_alt { - def rri_alt : PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, sse_imm_op:$src2), asm, - [], d>; - def rmi_alt : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, f128mem:$src, sse_imm_op:$src2), asm, - [], d>; -} - //===----------------------------------------------------------------------===// // SSE1 Instructions //===----------------------------------------------------------------------===// @@ -741,6 +704,27 @@ let Constraints = "$src1 = $dst" in { // SSE 1 & 2 - Compare Instructions //===----------------------------------------------------------------------===// +multiclass sse12_cmp { + def rri : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, sse_imm_op:$cc), asm, + [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>; + def rmi : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, f128mem:$src, sse_imm_op:$cc), asm, + [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>; +} + +// FIXME: rename instructions to only use the class above +multiclass sse12_cmp_alt { + def rri_alt : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, sse_imm_op:$src2), asm, + [], d>; + def rmi_alt : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, f128mem:$src, sse_imm_op:$src2), asm, + [], d>; +} + // Comparison instructions let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { def CMPSSrr : SSIi8<0xC2, MRMSrcReg, @@ -860,6 +844,170 @@ def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), (load addr:$src2)))]>; } // Defs = [EFLAGS] +let Constraints = "$src1 = $dst" in { + defm CMPPS : sse12_cmp, + TB; + defm CMPPD : sse12_cmp, + TB, OpSize; +} +let isAsmParserOnly = 1 in { + defm VCMPPS : sse12_cmp, VEX_4V; + defm VCMPPD : sse12_cmp, OpSize, VEX_4V; +} + +let isAsmParserOnly = 1, Pattern = [] in { + // Accept explicit immediate argument form instead of comparison code. + let Constraints = "$src1 = $dst" in { + defm CMPPS : sse12_cmp_alt, TB; + defm CMPPD : sse12_cmp_alt, TB, OpSize; + } + defm VCMPPS : sse12_cmp_alt, VEX_4V; + defm VCMPPD : sse12_cmp_alt, OpSize, VEX_4V; +} + +def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), + (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; +def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), + (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; +def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), + (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; +def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), + (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Shuffle Instructions +//===----------------------------------------------------------------------===// + +/// sse12_shuffle - sse 1 & 2 shuffle instructions +multiclass sse12_shuffle { + def rmi : PIi8<0xC6, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2, i8imm:$src3), asm, + [(set VR128:$dst, (vt (shufp:$src3 + VR128:$src1, (mem_frag addr:$src2))))], d>; + let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in + def rri : PIi8<0xC6, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), asm, + [(set VR128:$dst, + (vt (shufp:$src3 VR128:$src1, VR128:$src2)))], d>; +} + +let isAsmParserOnly = 1 in { + defm VSHUFPS : sse12_shuffle, VEX_4V; + defm VSHUFPD : sse12_shuffle, OpSize, VEX_4V; +} + +let Constraints = "$src1 = $dst" in { + defm SHUFPS : sse12_shuffle, + TB; + defm SHUFPD : sse12_shuffle, TB, OpSize; +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Unpack Instructions +//===----------------------------------------------------------------------===// + +/// sse12_unpack_interleave - sse 1 & 2 unpack and interleave +multiclass sse12_unpack_interleave opc, PatFrag OpNode, ValueType vt, + PatFrag mem_frag, RegisterClass RC, + X86MemOperand x86memop, string asm, + Domain d> { + def rr : PI; + def rm : PI; +} + +let AddedComplexity = 10 in { + let isAsmParserOnly = 1 in { + defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, + VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, + VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, + VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, + VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + } + + let Constraints = "$src1 = $dst" in { + defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, + VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", + SSEPackedSingle>, TB; + defm UNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, + VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", + SSEPackedDouble>, TB, OpSize; + defm UNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, + VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", + SSEPackedSingle>, TB; + defm UNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, + VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", + SSEPackedDouble>, TB, OpSize; + } // Constraints = "$src1 = $dst" +} // AddedComplexity + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Extract Floating-Point Sign mask +//===----------------------------------------------------------------------===// + +/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave +multiclass sse12_extr_sign_mask { + def rr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set GR32:$dst, (Int RC:$src))], d>; +} + +// Mask creation +defm MOVMSKPS : sse12_extr_sign_mask, TB; +defm MOVMSKPD : sse12_extr_sign_mask, TB, OpSize; + +let isAsmParserOnly = 1 in { + defm VMOVMSKPS : sse12_extr_sign_mask, VEX; + defm VMOVMSKPD : sse12_extr_sign_mask, OpSize, + VEX; +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions +//===----------------------------------------------------------------------===// + // Aliases of packed SSE1 & SSE2 instructions for scalar use. These all have // names that start with 'Fs'. @@ -1348,135 +1496,6 @@ defm RSQRT : sse1_fp_unop_rm<0x52, "rsqrt", X86frsqrt, defm RCP : sse1_fp_unop_rm<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss, int_x86_sse_rcp_ps>; -// Compare -let Constraints = "$src1 = $dst" in { - defm CMPPS : sse12_cmp, - TB; - defm CMPPD : sse12_cmp, - TB, OpSize; -} -let isAsmParserOnly = 1 in { - defm VCMPPS : sse12_cmp, VEX_4V; - defm VCMPPD : sse12_cmp, OpSize, VEX_4V; -} - -let isAsmParserOnly = 1, Pattern = [] in { - // Accept explicit immediate argument form instead of comparison code. - let Constraints = "$src1 = $dst" in { - defm CMPPS : sse12_cmp_alt, TB; - defm CMPPD : sse12_cmp_alt, TB, OpSize; - } - defm VCMPPS : sse12_cmp_alt, VEX_4V; - defm VCMPPD : sse12_cmp_alt, OpSize, VEX_4V; -} - -def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), - (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; -def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), - (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; -def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), - (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; -def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), - (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; - -// Shuffle and unpack instructions -multiclass sse12_shuffle { - def rmi : PIi8<0xC6, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f128mem:$src2, i8imm:$src3), asm, - [(set VR128:$dst, (vt (shufp:$src3 - VR128:$src1, (mem_frag addr:$src2))))], d>; - let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in - def rri : PIi8<0xC6, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), asm, - [(set VR128:$dst, - (vt (shufp:$src3 VR128:$src1, VR128:$src2)))], d>; -} - -let Constraints = "$src1 = $dst" in { - defm SHUFPS : sse12_shuffle, - TB; - defm SHUFPD : sse12_shuffle, TB, OpSize; - - let Constraints = "", isAsmParserOnly = 1 in { - defm VSHUFPS : sse12_shuffle, VEX_4V; - defm VSHUFPD : sse12_shuffle, OpSize, VEX_4V; - } - - let AddedComplexity = 10 in { - let Constraints = "", isAsmParserOnly = 1 in { - defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, - VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, VEX_4V; - defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, - VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; - defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, - VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, VEX_4V; - defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, - VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; - } - defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, - VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", - SSEPackedSingle>, TB; - defm UNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, - VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", - SSEPackedDouble>, TB, OpSize; - defm UNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, - VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", - SSEPackedSingle>, TB; - defm UNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, - VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", - SSEPackedDouble>, TB, OpSize; - } // AddedComplexity -} // Constraints = "$src1 = $dst" - -multiclass sse12_extr_sign_mask { - def rr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set GR32:$dst, (Int RC:$src))], d>; -} - -// Mask creation -defm MOVMSKPS : sse12_extr_sign_mask, TB; -defm MOVMSKPD : sse12_extr_sign_mask, TB, OpSize; - -let isAsmParserOnly = 1 in { - defm VMOVMSKPS : sse12_extr_sign_mask, VEX; - defm VMOVMSKPD : sse12_extr_sign_mask, OpSize, - VEX; -} - // Prefetch intrinsic. def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>; -- 2.34.1