From 7d8f1710a314e5bbc580a9e210d7dc6fa1b50510 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 17 Oct 2014 18:00:48 +0000 Subject: [PATCH] R600/SI: Allow commuting with source modifiers git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220066 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrInfo.cpp | 33 +++++++++++------ test/CodeGen/R600/commute_modifiers.ll | 50 ++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 11 deletions(-) create mode 100644 test/CodeGen/R600/commute_modifiers.ll diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 691e108442b..ae8a23e6fdb 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -718,19 +718,30 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, return nullptr; } - // XXX: Commute VOP3 instructions with abs and neg set . - const MachineOperand *Src0Mods = getNamedOperand(*MI, - AMDGPU::OpName::src0_modifiers); - const MachineOperand *Src1Mods = getNamedOperand(*MI, - AMDGPU::OpName::src1_modifiers); - const MachineOperand *Src2Mods = getNamedOperand(*MI, - AMDGPU::OpName::src2_modifiers); - - if ((Src0Mods && Src0Mods->getImm()) || - (Src1Mods && Src1Mods->getImm()) || - (Src2Mods && Src2Mods->getImm())) + // TODO: Is there any reason to commute with src2 modifiers? + // TODO: Should be able to commute with output modifiers just fine. + if (hasModifiersSet(*MI, AMDGPU::OpName::src2_modifiers)) return nullptr; + // Be sure to copy the source modifiers to the right place. + if (MachineOperand *Src0Mods + = getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) { + MachineOperand *Src1Mods + = getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers); + + int Src0ModsVal = Src0Mods->getImm(); + if (!Src1Mods && Src0ModsVal != 0) + return nullptr; + + // XXX - This assert might be a lie. It might be useful to have a neg + // modifier with 0.0. + int Src1ModsVal = Src1Mods->getImm(); + assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates"); + + Src1Mods->setImm(Src0ModsVal); + Src0Mods->setImm(Src1ModsVal); + } + unsigned Reg = Src0.getReg(); unsigned SubReg = Src0.getSubReg(); if (Src1.isImm()) diff --git a/test/CodeGen/R600/commute_modifiers.ll b/test/CodeGen/R600/commute_modifiers.ll new file mode 100644 index 00000000000..97038f19003 --- /dev/null +++ b/test/CodeGen/R600/commute_modifiers.ll @@ -0,0 +1,50 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare i32 @llvm.r600.read.tidig.x() #1 +declare float @llvm.fabs.f32(float) #1 + +; FUNC-LABEL: @commute_add_imm_fabs_f32 +; SI: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI: V_ADD_F32_e64 [[REG:v[0-9]+]], 2.0, |[[X]]| +; SI-NEXT: BUFFER_STORE_DWORD [[REG]] +define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %x = load float addrspace(1)* %gep.0 + %x.fabs = call float @llvm.fabs.f32(float %x) #1 + %z = fadd float 2.0, %x.fabs + store float %z, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @commute_mul_imm_fneg_fabs_f32 +; SI: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI: V_MUL_F32_e64 [[REG:v[0-9]+]], -4.0, |[[X]]| +; SI-NEXT: BUFFER_STORE_DWORD [[REG]] +define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %x = load float addrspace(1)* %gep.0 + %x.fabs = call float @llvm.fabs.f32(float %x) #1 + %x.fneg.fabs = fsub float -0.000000e+00, %x.fabs + %z = fmul float 4.0, %x.fneg.fabs + store float %z, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @commute_mul_imm_fneg_f32 +; SI: BUFFER_LOAD_DWORD [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI: V_MUL_F32_e32 [[REG:v[0-9]+]], -4.0, [[X]] +; SI-NEXT: BUFFER_STORE_DWORD [[REG]] +define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %x = load float addrspace(1)* %gep.0 + %x.fneg = fsub float -0.000000e+00, %x + %z = fmul float 4.0, %x.fneg + store float %z, float addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } -- 2.34.1