From fb4d106de139f23d00cc9da5d98af97176c63a55 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 11 Aug 2015 20:19:23 +0000 Subject: [PATCH] [x86] enable machine combiner reassociations for 128-bit vector single/double multiplies git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@244657 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 8 +++-- test/CodeGen/X86/machine-combiner.ll | 44 ++++++++++++++++++++++++++++ test/CodeGen/X86/sqrt-fastmath.ll | 4 +-- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index d79806e664b..efa972ad9a9 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -6395,12 +6395,16 @@ static bool isAssociativeAndCommutative(const MachineInstr &Inst) { case X86::ADDPSrr: case X86::ADDSDrr: case X86::ADDSSrr: + case X86::MULPDrr: + case X86::MULPSrr: + case X86::MULSDrr: + case X86::MULSSrr: case X86::VADDPDrr: case X86::VADDPSrr: case X86::VADDSDrr: case X86::VADDSSrr: - case X86::MULSDrr: - case X86::MULSSrr: + case X86::VMULPDrr: + case X86::VMULPSrr: case X86::VMULSDrr: case X86::VMULSSrr: return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath; diff --git a/test/CodeGen/X86/machine-combiner.ll b/test/CodeGen/X86/machine-combiner.ll index af1c661417c..fb21c19744f 100644 --- a/test/CodeGen/X86/machine-combiner.ll +++ b/test/CodeGen/X86/machine-combiner.ll @@ -254,3 +254,47 @@ define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1, ret <2 x double> %t2 } +; Verify that SSE and AVX 128-bit vector single-precison multiplies are reassociated. + +define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { +; SSE-LABEL: reassociate_muls_v4f32: +; SSE: # BB#0: +; SSE-NEXT: addps %xmm1, %xmm0 +; SSE-NEXT: mulps %xmm3, %xmm2 +; SSE-NEXT: mulps %xmm2, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: reassociate_muls_v4f32: +; AVX: # BB#0: +; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmulps %xmm3, %xmm2, %xmm1 +; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %t0 = fadd <4 x float> %x0, %x1 + %t1 = fmul <4 x float> %x2, %t0 + %t2 = fmul <4 x float> %x3, %t1 + ret <4 x float> %t2 +} + +; Verify that SSE and AVX 128-bit vector double-precison multiplies are reassociated. + +define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { +; SSE-LABEL: reassociate_muls_v2f64: +; SSE: # BB#0: +; SSE-NEXT: addpd %xmm1, %xmm0 +; SSE-NEXT: mulpd %xmm3, %xmm2 +; SSE-NEXT: mulpd %xmm2, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: reassociate_muls_v2f64: +; AVX: # BB#0: +; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmulpd %xmm3, %xmm2, %xmm1 +; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %t0 = fadd <2 x double> %x0, %x1 + %t1 = fmul <2 x double> %x2, %t0 + %t2 = fmul <2 x double> %x3, %t1 + ret <2 x double> %t2 +} + diff --git a/test/CodeGen/X86/sqrt-fastmath.ll b/test/CodeGen/X86/sqrt-fastmath.ll index 9b851db8121..daa6d7292e7 100644 --- a/test/CodeGen/X86/sqrt-fastmath.ll +++ b/test/CodeGen/X86/sqrt-fastmath.ll @@ -99,8 +99,8 @@ define <4 x float> @reciprocal_square_root_v4f32(<4 x float> %x) #0 { ; ESTIMATE-LABEL: reciprocal_square_root_v4f32: ; ESTIMATE: # BB#0: ; ESTIMATE-NEXT: vrsqrtps %xmm0, %xmm1 -; ESTIMATE-NEXT: vmulps %xmm1, %xmm1, %xmm2 -; ESTIMATE-NEXT: vmulps %xmm0, %xmm2, %xmm0 +; ESTIMATE-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; ESTIMATE-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; ESTIMATE-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 ; ESTIMATE-NEXT: vmulps {{.*}}(%rip), %xmm1, %xmm1 ; ESTIMATE-NEXT: vmulps %xmm1, %xmm0, %xmm0 -- 2.34.1