return Chain;
}
+/// Usually ISel will insert a copy between terminator insturction that output
+/// a value and the S_BRANCH* at the end of the block. This causes
+/// MachineBasicBlock::getFirstTerminator() to return the incorrect value,
+/// so we want to make sure there are no copies between terminators at the
+/// end of blocks.
+static void LowerTerminatorWithOutput(unsigned Opcode, MachineBasicBlock *BB,
+ MachineInstr *MI,
+ const TargetInstrInfo *TII,
+ MachineRegisterInfo &MRI) {
+ unsigned DstReg = MI->getOperand(0).getReg();
+ // Usually ISel will insert a copy between the SI_IF_NON_TERM instruction
+ // and the S_BRANCH* terminator. We want to replace SI_IF_NO_TERM with
+ // SI_IF and we can't have any instructions between S_BRANCH* and SI_IF,
+ // since they are both terminators
+ assert(MRI.hasOneUse(DstReg));
+ MachineOperand &Use = *MRI.use_begin(DstReg);
+ MachineInstr *UseMI = Use.getParent();
+ assert(UseMI->getOpcode() == AMDGPU::COPY);
+
+ MRI.replaceRegWith(UseMI->getOperand(0).getReg(), DstReg);
+ UseMI->eraseFromParent();
+ BuildMI(*BB, BB->getFirstTerminator(), MI->getDebugLoc(),
+ TII->get(Opcode))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(2));
+ MI->eraseFromParent();
+}
+
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const {
MachineBasicBlock::iterator I = *MI;
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
switch (MI->getOpcode()) {
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case AMDGPU::BRANCH: return BB;
case AMDGPU::SI_ADDR64_RSRC: {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
unsigned SuperReg = MI->getOperand(0).getReg();
unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
MI->eraseFromParent();
break;
}
- case AMDGPU::V_SUB_F64: {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ case AMDGPU::SI_IF_NON_TERM:
+ LowerTerminatorWithOutput(AMDGPU::SI_IF, BB, MI, TII, MRI);
+ break;
+ case AMDGPU::SI_ELSE_NON_TERM:
+ LowerTerminatorWithOutput(AMDGPU::SI_ELSE, BB, MI, TII, MRI);
+ break;
+ case AMDGPU::V_SUB_F64:
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64),
MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addImm(2); /* NEG */
MI->eraseFromParent();
break;
- }
+
case AMDGPU::SI_RegisterStorePseudo: {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
MachineInstrBuilder MIB =
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore),
let mayLoad = 1, mayStore = 1, hasSideEffects = 1,
Uses = [EXEC], Defs = [EXEC] in {
+let usesCustomInserter = 1 in {
+
+def SI_IF_NON_TERM : InstSI <
+ (outs SReg_64:$dst),
+ (ins SReg_64:$vcc, brtarget:$target), "",
+ [(set i64:$dst, (int_SI_if i1:$vcc, bb:$target))]
+>;
+
+def SI_ELSE_NON_TERM : InstSI <
+ (outs SReg_64:$dst),
+ (ins SReg_64:$src, brtarget:$target),
+ "",
+ [(set i64:$dst, (int_SI_else i64:$src, bb:$target))]
+> {
+ let Constraints = "$src = $dst";
+}
+
+} // usesCustomInserter = 1
+
let isBranch = 1, isTerminator = 1 in {
-def SI_IF : InstSI <
+def SI_IF: InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$vcc, brtarget:$target),
- "SI_IF $dst, $vcc, $target",
- [(set i64:$dst, (int_SI_if i1:$vcc, bb:$target))]
+ "", []
>;
def SI_ELSE : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$src, brtarget:$target),
- "SI_ELSE $dst, $src, $target",
- [(set i64:$dst, (int_SI_else i64:$src, bb:$target))]> {
-
+ "", []
+> {
let Constraints = "$src = $dst";
}
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
declare i32 @llvm.r600.read.tidig.x() readnone
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s
; Test that codegenprepare understands address space sizes
; XFAIL: *
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI %s
declare i32 @llvm.SI.tid() readnone
-; RUN: not llc -march=r600 -mcpu=SI < %s 2>&1 | FileCheck %s
+; RUN: not llc -march=r600 -mcpu=SI -verify-machineinstrs< %s 2>&1 | FileCheck %s
; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s
; CHECK: error: unsupported call to function defined_function in test_call
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: @anyext_load_i8:
; EG: AND_INT
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: @extract_vector_elt_v2i16
; SI: BUFFER_LOAD_USHORT
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s
define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
; CHECK-LABEL: @use_gep_address_space:
; REQUIRES: asserts
; XFAIL: *
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; SI-LABEL: @dynamic_insertelement_v2f64:
; RUN: llc < %s -march=r600 --mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 --mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 --mcpu=SI -verify-machineinstrs| FileCheck %s --check-prefix=SI-CHECK
; R600-CHECK-LABEL: @sqrt_f32
; R600-CHECK: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].Z
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; SI-LABEL: @global_copy_i1_to_i1
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; SI-LABEL: @local_i32_load
; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x1c, [M0]
; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
-; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "r600--"
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
declare i32 @llvm.SI.tid() nounwind readnone
; XFAIL: *
; REQUIRES: asserts
-; RUN: llc -O0 -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI
+; RUN: llc -O0 -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI
declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
; FUNC-LABEL: @setcc_v2i32
; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
; XXX: Merge this into setcc, once R600 supports 64-bit operations
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; Copy VGPR -> SGPR used twice as an instruction operand, which is then
; used in an REG_SEQUENCE that also needs to be handled.
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
; This test checks that no VGPR to SGPR copies are created by the register
; allocator.
; REQUIRES: asserts
; XFAIL: *
-; RUN: llc -march=r600 -mcpu=SI -asm-verbose=false < %s | FileCheck %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs-asm-verbose=false < %s | FileCheck %s
define void @test(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {
; XFAIL: *
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI
; SI-LABEL: @global_store_v3i64:
; SI: BUFFER_STORE_DWORDX4
; REQUIRES: asserts
; XFAIL: *
-; RUN: llc -march=r600 -mcpu=SI < %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s
define void @store_vector_ptrs(<4 x i32*>* %out, <4 x [1024 x i32]*> %array) nounwind {
%p = getelementptr <4 x [1024 x i32]*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; SI-LABEL: @global_truncstore_i32_to_i1
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
-;XUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+;XUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
;FUNC-LABEL: @test_udiv
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; SI-LABEL: @unaligned_load_store_i32:
; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]]