From: Sanjay Patel Date: Wed, 8 Jul 2015 22:35:20 +0000 (+0000) Subject: [x86] enable machine combiner reassociations for scalar single-precision multiplies X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=f769239c19643e28740a1b89b2e6a2acc96978a6;p=oota-llvm.git [x86] enable machine combiner reassociations for scalar single-precision multiplies git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241752 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index dd2d04dbe49..5484ae91855 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -6405,11 +6405,13 @@ static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) { // TODO: There are many more machine instruction opcodes to match: // 1. Other data types (double, integer, vectors) -// 2. Other math / logic operations (mul, and, or) +// 2. Other math / logic operations (and, or) static bool isAssociativeAndCommutative(unsigned Opcode) { switch (Opcode) { - case X86::VADDSSrr: case X86::ADDSSrr: + case X86::VADDSSrr: + case X86::MULSSrr: + case X86::VMULSSrr: return true; default: return false; diff --git a/test/CodeGen/X86/fdiv-combine.ll b/test/CodeGen/X86/fdiv-combine.ll index 279bb0624ac..8adfde7f3f3 100644 --- a/test/CodeGen/X86/fdiv-combine.ll +++ b/test/CodeGen/X86/fdiv-combine.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s ; Anything more than one division using a single divisor operand ; should be converted into a reciprocal and multiplication. @@ -17,9 +17,9 @@ define float @div2_arcp(float %x, float %y, float %z) #0 { ; CHECK: # BB#0: ; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero ; CHECK-NEXT: divss %xmm2, %xmm3 -; CHECK-NEXT: mulss %xmm3, %xmm0 ; CHECK-NEXT: mulss %xmm1, %xmm0 ; CHECK-NEXT: mulss %xmm3, %xmm0 +; CHECK-NEXT: mulss %xmm3, %xmm0 ; CHECK-NEXT: retq %div1 = fdiv arcp float %x, %z %mul = fmul arcp float %div1, %y diff --git a/test/CodeGen/X86/machine-combiner.ll b/test/CodeGen/X86/machine-combiner.ll index 545decb0311..2286da7e94d 100644 --- a/test/CodeGen/X86/machine-combiner.ll +++ b/test/CodeGen/X86/machine-combiner.ll @@ -144,3 +144,24 @@ define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) { ret float %t2 } +; Verify that SSE and AVX scalar single precison multiplies are reassociated. + +define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) { +; SSE-LABEL: reassociate_muls1: +; SSE: # BB#0: +; SSE-NEXT: divss %xmm1, %xmm0 +; SSE-NEXT: mulss %xmm3, %xmm2 +; SSE-NEXT: mulss %xmm2, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: reassociate_muls1: +; AVX: # BB#0: +; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm1 +; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %t0 = fdiv float %x0, %x1 + %t1 = fmul float %x2, %t0 + %t2 = fmul float %x3, %t1 + ret float %t2 +} diff --git a/test/CodeGen/X86/sqrt-fastmath.ll b/test/CodeGen/X86/sqrt-fastmath.ll index 0f8d9f4d713..9b851db8121 100644 --- a/test/CodeGen/X86/sqrt-fastmath.ll +++ b/test/CodeGen/X86/sqrt-fastmath.ll @@ -34,11 +34,11 @@ define float @ff(float %f) #0 { ; ESTIMATE: # BB#0: ; ESTIMATE-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 ; ESTIMATE-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm2 -; ESTIMATE-NEXT: vmulss %xmm1, %xmm1, %xmm1 -; ESTIMATE-NEXT: vmulss %xmm0, %xmm1, %xmm1 +; ESTIMATE-NEXT: vmulss %xmm0, %xmm1, %xmm3 +; ESTIMATE-NEXT: vmulss %xmm3, %xmm1, %xmm1 ; ESTIMATE-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1 +; ESTIMATE-NEXT: vmulss %xmm0, %xmm2, %xmm2 ; ESTIMATE-NEXT: vmulss %xmm2, %xmm1, %xmm1 -; ESTIMATE-NEXT: vmulss %xmm1, %xmm0, %xmm1 ; ESTIMATE-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; ESTIMATE-NEXT: vcmpeqss %xmm2, %xmm0, %xmm0 ; ESTIMATE-NEXT: vandnps %xmm1, %xmm0, %xmm0 @@ -78,7 +78,7 @@ define float @reciprocal_square_root(float %x) #0 { ; ESTIMATE: # BB#0: ; ESTIMATE-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 ; ESTIMATE-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm2 -; ESTIMATE-NEXT: vmulss %xmm1, %xmm1, %xmm1 +; ESTIMATE-NEXT: vmulss %xmm0, %xmm1, %xmm0 ; ESTIMATE-NEXT: vmulss %xmm0, %xmm1, %xmm0 ; ESTIMATE-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 ; ESTIMATE-NEXT: vmulss %xmm2, %xmm0, %xmm0