From: Bruno Cardoso Lopes Date: Tue, 22 Jun 2010 23:02:38 +0000 (+0000) Subject: Reapply support for AVX unpack and interleave instructions, with X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=0caca3967b6547a9c97bd5e10ee3babb345a9979;p=oota-llvm.git Reapply support for AVX unpack and interleave instructions, with testcases this time. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106593 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index a9633d22f90..b49c75d3091 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -444,6 +444,22 @@ multiclass sse12_fp_packed_int opc, string OpcodeStr, RegisterClass RC, RC:$src1, (mem_frag addr:$src2)))], d>; } +/// sse12_unpack_interleave - SSE 1 & 2 unpack and interleave +multiclass sse12_unpack_interleave opc, PatFrag OpNode, ValueType vt, + PatFrag mem_frag, RegisterClass RC, + X86MemOperand x86memop, string asm, + Domain d> { + def rr : PI; + def rm : PI; +} + //===----------------------------------------------------------------------===// // SSE1 Instructions //===----------------------------------------------------------------------===// @@ -1388,50 +1404,32 @@ let Constraints = "$src1 = $dst" in { VR128:$src1, (memopv2f64 addr:$src2))))]>; let AddedComplexity = 10 in { - def UNPCKHPSrr : PSI<0x15, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "unpckhps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (unpckh VR128:$src1, VR128:$src2)))]>; - def UNPCKHPSrm : PSI<0x15, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "unpckhps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (unpckh VR128:$src1, - (memopv4f32 addr:$src2))))]>; - - def UNPCKLPSrr : PSI<0x14, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "unpcklps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (unpckl VR128:$src1, VR128:$src2)))]>; - def UNPCKLPSrm : PSI<0x14, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "unpcklps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckl VR128:$src1, (memopv4f32 addr:$src2)))]>; - def UNPCKHPDrr : PDI<0x15, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "unpckhpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (unpckh VR128:$src1, VR128:$src2)))]>; - def UNPCKHPDrm : PDI<0x15, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "unpckhpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (unpckh VR128:$src1, - (memopv2f64 addr:$src2))))]>; - - def UNPCKLPDrr : PDI<0x14, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "unpcklpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (unpckl VR128:$src1, VR128:$src2)))]>; - def UNPCKLPDrm : PDI<0x14, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "unpcklpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckl VR128:$src1, (memopv2f64 addr:$src2)))]>; + let Constraints = "", isAsmParserOnly = 1 in { + defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, + VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, + VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, + VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, + VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + } + defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, + VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", + SSEPackedSingle>, TB; + defm UNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, + VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", + SSEPackedDouble>, TB, OpSize; + defm UNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, + VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", + SSEPackedSingle>, TB; + defm UNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, + VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", + SSEPackedDouble>, TB, OpSize; } // AddedComplexity } // Constraints = "$src1 = $dst" diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index 199f0bb32fd..201f6862086 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -10318,3 +10318,35 @@ // CHECK: encoding: [0xc5,0xeb,0x10,0xec] vmovsd %xmm4, %xmm2, %xmm5 +// CHECK: vunpckhps %xmm1, %xmm2, %xmm4 +// CHECK: encoding: [0xc5,0xe8,0x15,0xe1] + vunpckhps %xmm1, %xmm2, %xmm4 + +// CHECK: vunpckhpd %xmm1, %xmm2, %xmm4 +// CHECK: encoding: [0xc5,0xe9,0x15,0xe1] + vunpckhpd %xmm1, %xmm2, %xmm4 + +// CHECK: vunpcklps %xmm1, %xmm2, %xmm4 +// CHECK: encoding: [0xc5,0xe8,0x14,0xe1] + vunpcklps %xmm1, %xmm2, %xmm4 + +// CHECK: vunpcklpd %xmm1, %xmm2, %xmm4 +// CHECK: encoding: [0xc5,0xe9,0x14,0xe1] + vunpcklpd %xmm1, %xmm2, %xmm4 + +// CHECK: vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x15,0x6c,0xcb,0xfc] + vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x15,0x6c,0xcb,0xfc] + vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe8,0x14,0x6c,0xcb,0xfc] + vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5 + +// CHECK: vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0x14,0x6c,0xcb,0xfc] + vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5 + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index e1bb16b81c2..8c0085cfdfb 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -382,3 +382,35 @@ vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11 // CHECK: fixup A - offset: 5, value: CPI1_0-4 pshufb CPI1_0(%rip), %xmm1 +// CHECK: vunpckhps %xmm15, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0x15,0xef] + vunpckhps %xmm15, %xmm12, %xmm13 + +// CHECK: vunpckhpd %xmm15, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x15,0xef] + vunpckhpd %xmm15, %xmm12, %xmm13 + +// CHECK: vunpcklps %xmm15, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0x14,0xef] + vunpcklps %xmm15, %xmm12, %xmm13 + +// CHECK: vunpcklpd %xmm15, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x19,0x14,0xef] + vunpcklpd %xmm15, %xmm12, %xmm13 + +// CHECK: vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x18,0x15,0x7c,0xcb,0xfc] + vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15 + +// CHECK: vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x19,0x15,0x7c,0xcb,0xfc] + vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15 + +// CHECK: vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x18,0x14,0x7c,0xcb,0xfc] + vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15 + +// CHECK: vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15 +// CHECK: encoding: [0xc5,0x19,0x14,0x7c,0xcb,0xfc] + vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15 +