field dag Outs = (outs DstRC:$dst);
+ // VOP3b instructions are a special case with a second explicit
+ // output. This is manually overridden for them.
+ field dag Outs32 = Outs;
+ field dag Outs64 = Outs;
+
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
HasModifiers>.ret;
def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
-def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> {
+
+class VOP2b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, untyped]> {
+ let Asm32 = "$dst, vcc, $src0, $src1";
+ let Asm64 = "$dst, $sdst, $src0, $src1";
+ let Outs32 = (outs DstRC:$dst);
+ let Outs64 = (outs DstRC:$dst, SReg_64:$sdst);
+}
+
+def VOP2b_I32_I1_I32_I32 : VOP2b_Profile<i32>;
+
+def VOP2b_I32_I1_I32_I32_VCC : VOP2b_Profile<i32> {
let Src0RC32 = VCSrc_32;
}
def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
- // The VOP2 variant puts the carry out into VCC, the VOP3 variant
- // can write it into any SGPR. We currently don't use the carry out,
- // so for now hardcode it to VCC as well.
- let sdst = SIOperand.VCC, Defs = [VCC] in {
- def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 0, HasMods>;
+ def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>,
+ VOP3DisableFields<1, 0, HasMods>;
- def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 0, HasMods>;
- } // End sdst = SIOperand.VCC, Defs = [VCC]
+ def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>,
+ VOP3DisableFields<1, 0, HasMods>;
}
multiclass VOP3b_3_m <vop op, dag outs, dag ins, string asm,
opName, revOp, P.HasModifiers>;
}
-multiclass VOP2b_Helper <vop2 op, string opName, dag outs,
+multiclass VOP2b_Helper <vop2 op, string opName, dag outs32, dag outs64,
dag ins32, string asm32, list<dag> pat32,
dag ins64, string asm64, list<dag> pat64,
string revOp, bit HasMods> {
- defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
+ defm _e32 : VOP2_m <op, outs32, ins32, asm32, pat32, opName, revOp>;
defm _e64 : VOP3b_2_m <op,
- outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
+ outs64, ins64, opName#asm64, pat64, opName, revOp, HasMods
>;
}
multiclass VOP2bInst <vop2 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag,
string revOp = opName> : VOP2b_Helper <
- op, opName, P.Outs,
+ op, opName, P.Outs32, P.Outs64,
P.Ins32, P.Asm32, [],
P.Ins64, P.Asm64,
!if(P.HasModifiers,
// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
// but the VI instructions behave the same as the SI versions.
defm V_ADD_I32 : VOP2bInst <vop2<0x25, 0x19>, "v_add_i32",
- VOP_I32_I32_I32, add
+ VOP2b_I32_I1_I32_I32
>;
-defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP_I32_I32_I32>;
+defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP2b_I32_I1_I32_I32>;
defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32",
- VOP_I32_I32_I32, null_frag, "v_sub_i32"
+ VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32"
>;
let Uses = [VCC] in { // Carry-in comes from VCC
defm V_ADDC_U32 : VOP2bInst <vop2<0x28, 0x1c>, "v_addc_u32",
- VOP_I32_I32_I32_VCC
+ VOP2b_I32_I1_I32_I32_VCC
>;
defm V_SUBB_U32 : VOP2bInst <vop2<0x29, 0x1d>, "v_subb_u32",
- VOP_I32_I32_I32_VCC
+ VOP2b_I32_I1_I32_I32_VCC
>;
defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32",
- VOP_I32_I32_I32_VCC, null_frag, "v_subb_u32"
+ VOP2b_I32_I1_I32_I32_VCC, null_frag, "v_subb_u32"
>;
} // End Uses = [VCC]
;FUNC-LABEL: {{^}}test1:
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI: v_add_i32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 [[REG:v[0-9]+]], vcc, {{v[0-9]+, v[0-9]+}}
;SI-NOT: [[REG]]
;SI: buffer_store_dword [[REG]],
define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
; FIXME: We end up with zero argument for ADD, because
; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index
; with the appropriate offset. We should fold this into the store.
-; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}}
+; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 0, v{{[0-9]+}}
; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}]
;
; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
; to interpret:
; getelementptr [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b
-; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], 16
+; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 16
; SI-PROMOTE: ds_write_b32 [[PTRREG]]
define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
%alloca = alloca [4 x i32], i32 4, align 16
; SI-LABEL: {{^}}i8_zext_inreg_i32_to_f32:
; SI: buffer_load_dword [[LOADREG:v[0-9]+]],
-; SI: v_add_i32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]]
+; SI: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 2, [[LOADREG]]
; SI-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]]
; SI: buffer_store_dword [[CONV]],
define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
; CHECK: BB0_1:
; CHECK: v_add_i32_e32 [[VADDR:v[0-9]+]],
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]]
-; SI-DAG: v_add_i32_e32 [[VADDR4:v[0-9]+]], 4, [[VADDR]]
+; SI-DAG: v_add_i32_e32 [[VADDR4:v[0-9]+]], vcc, 4, [[VADDR]]
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR4]]
-; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], 0x80, [[VADDR]]
+; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], vcc, 0x80, [[VADDR]]
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x80]]
-; SI-DAG: v_add_i32_e32 [[VADDR0x84:v[0-9]+]], 0x84, [[VADDR]]
+; SI-DAG: v_add_i32_e32 [[VADDR0x84:v[0-9]+]], vcc, 0x84, [[VADDR]]
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x84]]
-; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
+; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], vcc, 0x100, [[VADDR]]
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]]
; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:1
; SI-LABEL: @simple_read2st64_f32_over_max_offset
; SI-NOT: ds_read2st64_b32
-; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
+; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}}
; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]
; SI: s_endpgm
; SI-LABEL: @simple_read2st64_f64_over_max_offset
; SI-NOT: ds_read2st64_b64
-; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
+; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}}
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
; SI: s_endpgm
; SI: buffer_load_dword [[LOAD:v[0-9]+]]
; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16
; SI: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]]
-; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], [[TMP0]], [[BFE]]
+; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]]
; SI: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
; SI: buffer_store_dword [[TMP2]]
define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
; constant offsets.
; EG: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
-; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}}
+; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], vcc, 16, v{{[0-9]+}}
; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]]
; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16
; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]]
; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
; CHECK-LABEL: {{^}}fold_sgpr:
-; CHECK: v_add_i32_e32 v{{[0-9]+}}, s
+; CHECK: v_add_i32_e32 v{{[0-9]+}}, vcc, s
define void @fold_sgpr(i32 addrspace(1)* %out, i32 %fold) {
entry:
%tmp0 = icmp ne i32 %fold, 0
; GCN-LABEL: {{^}}legal_offset_fi_offset
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen
-; GCN: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000
+; GCN: v_add_i32_e32 [[OFFSET:v[0-9]+]], vcc, 0x8000
; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) {
; FUNC-LABEL: {{^}}shl_2_add_9_i32:
; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 36, [[REG]]
+; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 36, [[REG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
}
; FUNC-LABEL: {{^}}shl_2_add_9_i32_2_add_uses:
-; SI-DAG: v_add_i32_e32 [[ADDREG:v[0-9]+]], 9, {{v[0-9]+}}
+; SI-DAG: v_add_i32_e32 [[ADDREG:v[0-9]+]], vcc, 9, {{v[0-9]+}}
; SI-DAG: v_lshlrev_b32_e32 [[SHLREG:v[0-9]+]], 2, {{v[0-9]+}}
; SI-DAG: buffer_store_dword [[ADDREG]]
; SI-DAG: buffer_store_dword [[SHLREG]]
; FUNC-LABEL: {{^}}shl_2_add_999_i32:
; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 0xf9c, [[REG]]
+; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xf9c, [[REG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
; SI-LABEL: {{^}}load_shl_base_lds_1:
; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8
-; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], 8, v{{[0-9]+}}
+; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], vcc, 8, v{{[0-9]+}}
; SI-DAG: buffer_store_dword [[RESULT]]
; SI-DAG: buffer_store_dword [[ADDUSE]]
; SI: s_endpgm
; FUNC-LABEL: {{^}}test_sub_i32:
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: v_subrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_subrev_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%a = load i32, i32 addrspace(1)* %in
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
; SI: v_rcp_iflag_f32_e32 [[RCP:v[0-9]+]]
; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]]
; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]]
-; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], 0, [[RCP_LO]]
+; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]]
; SI: v_cndmask_b32_e64
; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]]
-; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], [[E]], [[RCP]]
-; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], [[E]], [[RCP]]
+; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]]
+; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]]
; SI: v_cndmask_b32_e64
; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]]
; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[Num_S_Remainder]]
+; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]]
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]]
-; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], 1, [[Quotient]]
+; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]]
; SI-DAG: v_subrev_i32_e32 [[Quotient_S_One:v[0-9]+]],
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]]
; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
+; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[FIRST_RCP_LO]]
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
-; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]]
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]]
; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}}
+; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[0-9]+]], vcc, [[FIRST_Num_S_Remainder]], v{{[0-9]+}}
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]]
; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]]
; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
+; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[SECOND_RCP_LO]]
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
-; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]]
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]]
; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}}
+; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], vcc, [[SECOND_Num_S_Remainder]], v{{[0-9]+}}
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]]
; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]]
; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
+; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[FIRST_RCP_LO]]
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
-; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]]
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]]
; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[l0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}}
+; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[l0-9]+]], vcc, [[FIRST_Num_S_Remainder]], v{{[0-9]+}}
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]]
; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]]
; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
+; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[SECOND_RCP_LO]]
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
-; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]]
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]]
; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}}
+; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], vcc, [[SECOND_Num_S_Remainder]], v{{[0-9]+}}
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]]
; SI-DAG: v_rcp_iflag_f32_e32 [[THIRD_RCP:v[0-9]+]]
; SI-DAG: v_mul_hi_u32 [[THIRD_RCP_HI:v[0-9]+]], [[THIRD_RCP]]
; SI-DAG: v_mul_lo_i32 [[THIRD_RCP_LO:v[0-9]+]], [[THIRD_RCP]]
-; SI-DAG: v_sub_i32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], 0, [[THIRD_RCP_LO]]
+; SI-DAG: v_sub_i32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[THIRD_RCP_LO]]
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_mul_hi_u32 [[THIRD_E:v[0-9]+]], {{v[0-9]+}}, [[THIRD_RCP]]
-; SI-DAG: v_add_i32_e32 [[THIRD_RCP_A_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[THIRD_RCP_S_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
+; SI-DAG: v_add_i32_e32 [[THIRD_RCP_A_E:v[0-9]+]], vcc, [[THIRD_E]], [[THIRD_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[THIRD_RCP_S_E:v[0-9]+]], vcc, [[THIRD_E]], [[THIRD_RCP]]
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_mul_hi_u32 [[THIRD_Quotient:v[0-9]+]]
; SI-DAG: v_mul_lo_i32 [[THIRD_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder:v[0-9]+]], [[THIRD_Num_S_Remainder]], {{v[0-9]+}}
+; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder:v[0-9]+]], vcc, [[THIRD_Num_S_Remainder]], {{v[0-9]+}}
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_and_b32_e32 [[THIRD_Tmp1:v[0-9]+]]
; SI-DAG: v_rcp_iflag_f32_e32 [[FOURTH_RCP:v[0-9]+]]
; SI-DAG: v_mul_hi_u32 [[FOURTH_RCP_HI:v[0-9]+]], [[FOURTH_RCP]]
; SI-DAG: v_mul_lo_i32 [[FOURTH_RCP_LO:v[0-9]+]], [[FOURTH_RCP]]
-; SI-DAG: v_sub_i32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], 0, [[FOURTH_RCP_LO]]
+; SI-DAG: v_sub_i32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[FOURTH_RCP_LO]]
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_mul_hi_u32 [[FOURTH_E:v[0-9]+]], {{v[0-9]+}}, [[FOURTH_RCP]]
-; SI-DAG: v_add_i32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
+; SI-DAG: v_add_i32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], vcc, [[FOURTH_E]], [[FOURTH_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], vcc, [[FOURTH_E]], [[FOURTH_RCP]]
; SI-DAG: v_cndmask_b32_e64
; SI: s_endpgm
define void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) {
; Test that we correctly commute a sub instruction
; FUNC-LABEL: {{^}}sub_rev:
-; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, s
-; SI: v_subrev_i32_e32 v{{[0-9]+}}, s
+; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, vcc, s
+; SI: v_subrev_i32_e32 v{{[0-9]+}}, vcc, s
; ModuleID = 'vop-shrink.ll'
v_mul_i32_i24_e64 v1, v2, 100
// CHECK: error: invalid operand for instruction
+v_add_i32_e32 v1, s[0:1], v2, v3
+// CHECK: error: invalid operand for instruction
+
// TODO: Constant bus restrictions
// VI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00]
v_mbcnt_hi_u32_b32 v1, v2, v3
-// SICI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
-// VI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
-v_add_i32 v1, v2, v3
+// SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
+// VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
+v_add_i32 v1, vcc, v2, v3
-// SICI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
-// VI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
-v_add_u32 v1, v2, v3
+// SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00]
+// VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
+v_add_i32 v1, s[0:1], v2, v3
-// SICI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
-// VI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
-v_sub_i32 v1, v2, v3
+// SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00]
+// VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
+v_add_i32_e64 v1, s[0:1], v2, v3
-// SICI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
-// VI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
-v_sub_u32 v1, v2, v3
+// SICI: v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x4a,0xd2,0x02,0x07,0x02,0x00]
+// VI: v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x19,0xd1,0x02,0x07,0x02,0x00]
+v_add_i32_e64 v1, vcc, v2, v3
-// SICI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
-// VI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
-v_subrev_i32 v1, v2, v3
+// SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
+// VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
+v_add_u32 v1, vcc, v2, v3
-// SICI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
-// VI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
-v_subrev_u32 v1, v2, v3
+// SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00]
+// VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
+v_add_u32 v1, s[0:1], v2, v3
-// SICI: v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50]
-// VI: v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x38]
-v_addc_u32 v1, v2, v3
+// SICI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
+// VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
+v_sub_i32 v1, vcc, v2, v3
-// SICI: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52]
-// VI: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a]
-v_subb_u32 v1, v2, v3
+// SICI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00]
+// VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
+v_sub_i32 v1, s[0:1], v2, v3
-// SICI: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x54]
-// VI: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c]
-v_subbrev_u32 v1, v2, v3
+// SICI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
+// VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
+v_sub_u32 v1, vcc, v2, v3
+
+// SICI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00]
+// VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
+v_sub_u32 v1, s[0:1], v2, v3
+
+// SICI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
+// VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
+v_subrev_i32 v1, vcc, v2, v3
+
+// SICI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4e,0xd2,0x02,0x07,0x02,0x00]
+// VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
+v_subrev_i32 v1, s[0:1], v2, v3
+
+// SICI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
+// VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
+v_subrev_u32 v1, vcc, v2, v3
+
+// SICI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4e,0xd2,0x02,0x07,0x02,0x00]
+// VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
+v_subrev_u32 v1, s[0:1], v2, v3
+
+// SICI: v_addc_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x50]
+// VI: v_addc_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x38]
+v_addc_u32 v1, vcc, v2, v3
+
+// SICI: v_addc_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00]
+// VI: v_addc_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x02,0x00]
+v_addc_u32 v1, s[0:1], v2, v3
+
+// SICI: v_subb_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x52]
+// VI: v_subb_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a]
+v_subb_u32 v1, vcc, v2, v3
+
+// SICI: v_subb_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x52,0xd2,0x02,0x07,0x02,0x00]
+// VI: v_subb_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1d,0xd1,0x02,0x07,0x02,0x00]
+v_subb_u32 v1, s[0:1], v2, v3
+
+// SICI: v_subbrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x54]
+// VI: v_subbrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c]
+v_subbrev_u32 v1, vcc, v2, v3
+
+// SICI: v_subbrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x54,0xd2,0x02,0x07,0x02,0x00]
+// VI: v_subbrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0x02,0x00]
+v_subbrev_u32 v1, s[0:1], v2, v3
// SICI: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56]
// VI: v_ldexp_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00]