From: Matt Arsenault Date: Thu, 13 Nov 2014 20:44:23 +0000 (+0000) Subject: R600/SI: Use s_movk_i32 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=01ab7a869d06efaa0949e9f6bac16a293115ceb9;p=oota-llvm.git R600/SI: Use s_movk_i32 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221922 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 6e098cd1dc1..b84a2b12149 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -368,12 +368,12 @@ class SOPC_64 op, string opName, PatLeaf cond = COND_NULL> : SOPC_Helper; class SOPK_32 op, string opName, list pattern> : SOPK < - op, (outs SReg_32:$dst), (ins i16imm:$src0), + op, (outs SReg_32:$dst), (ins u16imm:$src0), opName#" $dst, $src0", pattern >; class SOPK_64 op, string opName, list pattern> : SOPK < - op, (outs SReg_64:$dst), (ins i16imm:$src0), + op, (outs SReg_64:$dst), (ins u16imm:$src0), opName#" $dst, $src0", pattern >; diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index 59270ee062e..9702565c462 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -586,6 +586,8 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock::iterator Start = MBB.getFirstNonPHI(); const MCInstrDesc &SMovK = TII->get(AMDGPU::S_MOVK_I32); + assert(isInt<16>(StackOffset) && isInt<16>(StackSizeBytes)); + BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_LO) .addImm(StackOffset); diff --git a/lib/Target/R600/SIShrinkInstructions.cpp b/lib/Target/R600/SIShrinkInstructions.cpp index 0b9e7ca9666..45e83f54e7c 100644 --- a/lib/Target/R600/SIShrinkInstructions.cpp +++ b/lib/Target/R600/SIShrinkInstructions.cpp @@ -189,6 +189,19 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { Next = std::next(I); MachineInstr &MI = *I; + // Try to use S_MOVK_I32, which will save 4 bytes for small immediates. + if (MI.getOpcode() == AMDGPU::S_MOV_B32) { + const MachineOperand &Src = MI.getOperand(1); + + // TODO: Handle FPImm? + if (Src.isImm()) { + if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src)) { + MI.setDesc(TII->get(AMDGPU::S_MOVK_I32)); + continue; + } + } + } + if (!TII->hasVALU32BitEncoding(MI.getOpcode())) continue; diff --git a/test/CodeGen/R600/flat-address-space.ll b/test/CodeGen/R600/flat-address-space.ll index c147bd128d9..fc5af7c889e 100644 --- a/test/CodeGen/R600/flat-address-space.ll +++ b/test/CodeGen/R600/flat-address-space.ll @@ -156,8 +156,8 @@ define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* ; Check for prologue initializing special SGPRs pointing to scratch. ; CHECK-LABEL: {{^}}store_flat_scratch: ; CHECK: s_movk_i32 flat_scratch_lo, 0 -; CHECK-NO-PROMOTE: s_movk_i32 flat_scratch_hi, 40 -; CHECK-PROMOTE: s_movk_i32 flat_scratch_hi, 0 +; CHECK-NO-PROMOTE: s_movk_i32 flat_scratch_hi, 0x28{{$}} +; CHECK-PROMOTE: s_movk_i32 flat_scratch_hi, 0x0{{$}} ; CHECK: flat_store_dword ; CHECK: s_barrier ; CHECK: flat_load_dword diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll index 99dedac809e..b7493d3fb49 100644 --- a/test/CodeGen/R600/or.ll +++ b/test/CodeGen/R600/or.ll @@ -116,7 +116,7 @@ define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, ; SI-LABEL: {{^}}vector_or_i64_loadimm: ; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f -; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x146f +; SI-DAG: s_movk_i32 [[HI_S_IMM:s[0-9]+]], 0x146f ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] diff --git a/test/CodeGen/R600/s_movk_i32.ll b/test/CodeGen/R600/s_movk_i32.ll new file mode 100644 index 00000000000..71f9a4120e8 --- /dev/null +++ b/test/CodeGen/R600/s_movk_i32.ll @@ -0,0 +1,184 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +; SI-LABEL: {{^}}s_movk_i32_k0: +; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff{{$}} +; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}} +; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: s_endpgm +define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 4295032831 ; ((1 << 16) - 1) | (1 << 32) + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k1: +; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}} +; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}} +; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: s_endpgm +define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 4295000063 ; ((1 << 15) - 1) | (1 << 32) + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k2: +; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}} +; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 64{{$}} +; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: s_endpgm +define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 274877939711 ; ((1 << 15) - 1) | (64 << 32) + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k3: +; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}} +; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}} +; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: s_endpgm +define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 4295000064 ; (1 << 15) | (1 << 32) + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k4: +; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x20000{{$}} +; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}} +; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: s_endpgm +define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 4295098368 ; (1 << 17) | (1 << 32) + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k5: +; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0xffef{{$}} +; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0xff00ffff{{$}} +; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: s_endpgm +define void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 18374967954648334319 ; -17 & 0xff00ffffffffffff + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k6: +; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x41{{$}} +; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 63{{$}} +; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: s_endpgm +define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 270582939713 ; 65 | (63 << 32) + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k7: +; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x2000{{$}} +; SI-DAG: s_movk_i32 [[HI_S_IMM:s[0-9]+]], 0x4000{{$}} +; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: s_endpgm +define void @s_movk_i32_k7(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 70368744185856; ((1 << 13)) | ((1 << 14) << 32) + store i64 %or, i64 addrspace(1)* %out + ret void +} + + +; SI-LABEL: {{^}}s_movk_i32_k8: +; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}} +; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}} +; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: s_endpgm +define void @s_movk_i32_k8(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 1229782942255906816 ; 0x11111111ffff8000 + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k9: +; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8001{{$}} +; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}} +; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: s_endpgm +define void @s_movk_i32_k9(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 1229782942255906817 ; 0x11111111ffff8001 + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k10: +; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8888{{$}} +; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}} +; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: s_endpgm +define void @s_movk_i32_k10(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 1229782942255909000 ; 0x11111111ffff8888 + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k11: +; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8fff{{$}} +; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}} +; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: s_endpgm +define void @s_movk_i32_k11(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 1229782942255910911 ; 0x11111111ffff8fff + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_movk_i32_k12: +; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff7001{{$}} +; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}} +; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI: s_endpgm +define void @s_movk_i32_k12(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 4 + %or = or i64 %loada, 1229782942255902721 ; 0x11111111ffff7001 + store i64 %or, i64 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/smrd.ll b/test/CodeGen/R600/smrd.ll index 23118f9a954..1c7df1608b5 100644 --- a/test/CodeGen/R600/smrd.ll +++ b/test/CodeGen/R600/smrd.ll @@ -24,7 +24,7 @@ entry: ; SMRD load with an offset greater than the largest possible immediate. ; CHECK-LABEL: {{^}}smrd2: -; CHECK: s_mov_b32 s[[OFFSET:[0-9]]], 0x400 +; CHECK: s_movk_i32 s[[OFFSET:[0-9]]], 0x400 ; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]] ; CHECK: s_endpgm define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { @@ -81,7 +81,7 @@ main_body: ; largets possible immediate. ; immediate offset. ; CHECK-LABEL: {{^}}smrd_load_const2: -; CHECK: s_mov_b32 s[[OFFSET:[0-9]]], 0x400 +; CHECK: s_movk_i32 s[[OFFSET:[0-9]]], 0x400 ; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]] define void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: