From 3e38856f04a01651819c6bc16fac4434a5d2b4c6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 18 Nov 2013 20:09:29 +0000 Subject: [PATCH] R600/SI: Move patterns to match add / sub to scalar instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195034 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrInfo.cpp | 4 ++++ lib/Target/R600/SIInstructions.td | 22 ++++++++++--------- .../R600/32-bit-local-address-space.ll | 10 +++++---- test/CodeGen/R600/address-space.ll | 7 +++--- test/CodeGen/R600/gep-address-space.ll | 14 ++++++------ test/CodeGen/R600/rotr.ll | 13 ++++++----- 6 files changed, 40 insertions(+), 30 deletions(-) diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 9d8dff19c63..a5d4e1ab060 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -373,6 +373,10 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; case AMDGPU::COPY: return AMDGPU::COPY; case AMDGPU::PHI: return AMDGPU::PHI; + case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32; + case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32; + case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32; + case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64; case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index e10c040f5e5..7747d6ae12a 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -974,16 +974,13 @@ defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC -defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", - [(set i32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] ->; - -defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", - [(set i32:$dst, (sub i32:$src0, i32:$src1))] ->; +// No patterns so that the scalar instructions are always selected. +// The scalar versions will be replaced with vector when needed later. +defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", []>; +defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", []>; defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">; -let Uses = [VCC] in { // Carry-out comes from VCC +let Uses = [VCC] in { // Carry-in comes from VCC defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>; defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>; defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">; @@ -1131,8 +1128,13 @@ def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>; def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>; def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>; def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>; -def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", []>; -def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", []>; +def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", + [(set i32:$dst, (add i32:$src0, i32:$src1))] +>; +def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", + [(set i32:$dst, (sub i32:$src0, i32:$src1))] +>; + def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32", []>; def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", []>; def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>; diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll index 63909f0b094..7a126878bef 100644 --- a/test/CodeGen/R600/32-bit-local-address-space.ll +++ b/test/CodeGen/R600/32-bit-local-address-space.ll @@ -20,8 +20,9 @@ entry: } ; CHECK-LABEL: @local_address_gep -; CHECK: V_ADD_I32_e{{32|64}} [[PTR:v[0-9]]] -; CHECK: DS_READ_B32 [[PTR]] +; CHECK: S_ADD_I32 [[SPTR:s[0-9]]] +; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]] +; CHECK: DS_READ_B32 [[VPTR]] define void @local_address_gep(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %offset) { entry: %0 = getelementptr i32 addrspace(3)* %in, i32 %offset @@ -31,8 +32,9 @@ entry: } ; CHECK-LABEL: @local_address_gep_const_offset -; CHECK: V_ADD_I32_e{{32|64}} [[PTR:v[0-9]]] -; CHECK: DS_READ_B32 [[PTR]] +; CHECK: S_ADD_I32 [[SPTR:s[0-9]]] +; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]] +; CHECK: DS_READ_B32 [[VPTR]] define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { entry: %0 = getelementptr i32 addrspace(3)* %in, i32 1 diff --git a/test/CodeGen/R600/address-space.ll b/test/CodeGen/R600/address-space.ll index 633101f42ac..1fc616a4ed4 100644 --- a/test/CodeGen/R600/address-space.ll +++ b/test/CodeGen/R600/address-space.ll @@ -5,9 +5,10 @@ %struct.foo = type { [3 x float], [3 x float] } ; CHECK-LABEL: @do_as_ptr_calcs: -; CHECK: V_ADD_I32_e64 {{v[0-9]+}}, -; CHECK: V_ADD_I32_e64 [[REG1:v[0-9]+]], -; CHECK: DS_READ_B32 [[REG1]], +; CHECK: S_ADD_I32 {{s[0-9]+}}, +; CHECK: S_ADD_I32 [[SREG1:s[0-9]+]], +; CHECK: V_MOV_B32_e32 [[VREG1:v[0-9]+]], [[SREG1]] +; CHECK: DS_READ_B32 [[VREG1]], define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind { entry: %x = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0 diff --git a/test/CodeGen/R600/gep-address-space.ll b/test/CodeGen/R600/gep-address-space.ll index 934b5a5956e..4ea21dde8a0 100644 --- a/test/CodeGen/R600/gep-address-space.ll +++ b/test/CodeGen/R600/gep-address-space.ll @@ -2,7 +2,7 @@ define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind { ; CHECK-LABEL @use_gep_address_space: -; CHECK: ADD_I32 +; CHECK: S_ADD_I32 %p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16 store i32 99, i32 addrspace(3)* %p ret void @@ -10,10 +10,10 @@ define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind { define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind { ; CHECK-LABEL: @gep_as_vector_v4: -; CHECK: V_ADD_I32 -; CHECK: V_ADD_I32 -; CHECK: V_ADD_I32 -; CHECK: V_ADD_I32 +; CHECK: S_ADD_I32 +; CHECK: S_ADD_I32 +; CHECK: S_ADD_I32 +; CHECK: S_ADD_I32 %p = getelementptr <4 x [1024 x i32] addrspace(3)*> %array, <4 x i16> zeroinitializer, <4 x i16> %p0 = extractelement <4 x i32 addrspace(3)*> %p, i32 0 %p1 = extractelement <4 x i32 addrspace(3)*> %p, i32 1 @@ -28,8 +28,8 @@ define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind define void @gep_as_vector_v2(<2 x [1024 x i32] addrspace(3)*> %array) nounwind { ; CHECK-LABEL: @gep_as_vector_v2: -; CHECK: V_ADD_I32 -; CHECK: V_ADD_I32 +; CHECK: S_ADD_I32 +; CHECK: S_ADD_I32 %p = getelementptr <2 x [1024 x i32] addrspace(3)*> %array, <2 x i16> zeroinitializer, <2 x i16> %p0 = extractelement <2 x i32 addrspace(3)*> %p, i32 0 %p1 = extractelement <2 x i32 addrspace(3)*> %p, i32 1 diff --git a/test/CodeGen/R600/rotr.ll b/test/CodeGen/R600/rotr.ll index 0a68d7e16ef..edf7aeebea0 100644 --- a/test/CodeGen/R600/rotr.ll +++ b/test/CodeGen/R600/rotr.ll @@ -1,10 +1,10 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s ; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s -; R600-CHECK: @rotr +; R600-CHECK-LABEL: @rotr: ; R600-CHECK: BIT_ALIGN_INT -; SI-CHECK: @rotr +; SI-CHECK-LABEL: @rotr: ; SI-CHECK: V_ALIGNBIT_B32 define void @rotr(i32 addrspace(1)* %in, i32 %x, i32 %y) { entry: @@ -16,15 +16,16 @@ entry: ret void } -; R600-CHECK: @rotl +; R600-CHECK-LABEL: @rotl: ; R600-CHECK: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x ; R600-CHECK-NEXT: 32 ; R600-CHECK: BIT_ALIGN_INT {{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}} -; SI-CHECK: @rotl -; SI-CHECK: V_SUB_I32_e64 [[DST:v[0-9]+]], 32, {{[sv][0-9]+}} -; SI-CHECK: V_ALIGNBIT_B32 {{v[0-9]+, [sv][0-9]+, v[0-9]+}}, [[DST]] +; SI-CHECK-LABEL: @rotl: +; SI-CHECK: S_SUB_I32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}} +; SI-CHECK: V_MOV_B32_e32 [[VDST:v[0-9]+]], [[SDST]] +; SI-CHECK: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, v[0-9]+}}, [[VDST]] define void @rotl(i32 addrspace(1)* %in, i32 %x, i32 %y) { entry: %0 = shl i32 %x, %y -- 2.34.1