// Returns the register class to use for sources of VOP3 instructions for the
// given VT.
class getVOP3SrcForVT<ValueType VT> {
- RegisterOperand ret = !if(!eq(VT.Size, 64), VCSrc_64, VCSrc_32);
+ RegisterOperand ret =
+ !if(!eq(VT.Size, 64),
+ VCSrc_64,
+ !if(!eq(VT.Value, i1.Value),
+ SCSrc_64,
+ VCSrc_32
+ )
+ );
}
// Returns 1 if the source arguments have modifiers, 0 if they do not.
"$dst, "#src0#src1#src2#"$clamp"#"$omod");
}
-
class VOPProfile <list<ValueType> _ArgVT> {
field list<ValueType> ArgVT = _ArgVT;
def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
-class VOP2b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, untyped]> {
+// Write out to vcc or arbitrary SGPR.
+def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
let Asm32 = "$dst, vcc, $src0, $src1";
let Asm64 = "$dst, $sdst, $src0, $src1";
let Outs32 = (outs DstRC:$dst);
let Outs64 = (outs DstRC:$dst, SReg_64:$sdst);
}
-def VOP2b_I32_I1_I32_I32 : VOP2b_Profile<i32>;
-
-def VOP2b_I32_I1_I32_I32_VCC : VOP2b_Profile<i32> {
+// Write out to vcc or arbitrary SGPR and read in from vcc or
+// arbitrary SGPR.
+def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
let Src0RC32 = VCSrc_32;
+ let Asm32 = "$dst, vcc, $src0, $src1, vcc";
+ let Asm64 = "$dst, $sdst, $src0, $src1, $src2";
+ let Outs32 = (outs DstRC:$dst);
+ let Outs64 = (outs DstRC:$dst, SReg_64:$sdst);
+
+ // Suppress src2 implied by type since the 32-bit encoding uses an
+ // implicit VCC use.
+ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
}
// VOPC instructions are a special case because for the 32-bit
// No VI instruction. This class is for SI only.
}
-// XXX - Is v_div_scale_{f32|f64} only available in vop3b without
-// option of implicit vcc use?
-multiclass VOP3b_2_m <vop op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName, string revOp,
- bit HasMods = 1, bit UseFullOp = 0> {
- def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
- VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
-
- def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 0, HasMods>;
-
- def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 0, HasMods>;
-}
-
-multiclass VOP3b_3_m <vop op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName, string revOp,
- bit HasMods = 1, bit UseFullOp = 0> {
+// Two operand VOP3b instruction that may have a 3rd SGPR bool operand
+// instead of an implicit VCC as in the VOP2b format.
+multiclass VOP3b_2_3_m <vop op, dag outs, dag ins, string asm,
+ list<dag> pattern, string opName, string revOp,
+ bit HasMods = 1, bit useSGPRInput = 0,
+ bit UseFullOp = 0> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
-
def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 1, HasMods>;
+ VOP3DisableFields<1, useSGPRInput, HasMods>;
def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 1, HasMods>;
+ VOP3DisableFields<1, useSGPRInput, HasMods>;
}
multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
multiclass VOP2b_Helper <vop2 op, string opName, dag outs32, dag outs64,
dag ins32, string asm32, list<dag> pat32,
dag ins64, string asm64, list<dag> pat64,
- string revOp, bit HasMods> {
+ string revOp, bit HasMods, bit useSGPRInput> {
- defm _e32 : VOP2_m <op, outs32, ins32, asm32, pat32, opName, revOp>;
+ let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
+ defm _e32 : VOP2_m <op, outs32, ins32, asm32, pat32, opName, revOp>;
+ }
- defm _e64 : VOP3b_2_m <op,
- outs64, ins64, opName#asm64, pat64, opName, revOp, HasMods
+ defm _e64 : VOP3b_2_3_m <op,
+ outs64, ins64, opName#asm64, pat64, opName, revOp, HasMods, useSGPRInput
>;
}
i1:$clamp, i32:$omod)),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
[(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
- revOp, P.HasModifiers
+ revOp, P.HasModifiers, !eq(P.NumSrcArgs, 3)
>;
// A VOP2 instruction that is VOP3-only on VI.
multiclass VOP3b_Helper <vop op, RegisterClass vrc, RegisterOperand arc,
string opName, list<dag> pattern> :
- VOP3b_3_m <
+ VOP3b_2_3_m <
op, (outs vrc:$vdst, SReg_64:$sdst),
(ins InputModsNoDefault:$src0_modifiers, arc:$src0,
InputModsNoDefault:$src1_modifiers, arc:$src1,
InputModsNoDefault:$src2_modifiers, arc:$src2,
ClampMod:$clamp, omod:$omod),
opName#" $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", pattern,
- opName, opName, 1, 1
+ opName, opName, 1, 0, 1
>;
multiclass VOP3b_64 <vop3 op, string opName, list<dag> pattern> :
// VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
v_subrev_u32 v1, s[0:1], v2, v3
-// SICI: v_addc_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x50]
-// VI: v_addc_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x38]
-v_addc_u32 v1, vcc, v2, v3
+// SICI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x50]
+// VI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x38]
+v_addc_u32 v1, vcc, v2, v3, vcc
-// SICI: v_addc_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00]
-// VI: v_addc_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x02,0x00]
-v_addc_u32 v1, s[0:1], v2, v3
+// SICI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x50]
+// VI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x38]
+v_addc_u32_e32 v1, vcc, v2, v3, vcc
-// SICI: v_subb_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x52]
-// VI: v_subb_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a]
-v_subb_u32 v1, vcc, v2, v3
-// SICI: v_subb_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x52,0xd2,0x02,0x07,0x02,0x00]
-// VI: v_subb_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1d,0xd1,0x02,0x07,0x02,0x00]
-v_subb_u32 v1, s[0:1], v2, v3
+// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0xaa,0x01]
+// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0xaa,0x01]
+v_addc_u32 v1, s[0:1], v2, v3, vcc
-// SICI: v_subbrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x54]
-// VI: v_subbrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c]
-v_subbrev_u32 v1, vcc, v2, v3
+// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x0a,0x00]
+// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x0a,0x00]
+v_addc_u32 v1, s[0:1], v2, v3, s[2:3]
-// SICI: v_subbrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x54,0xd2,0x02,0x07,0x02,0x00]
-// VI: v_subbrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0x02,0x00]
-v_subbrev_u32 v1, s[0:1], v2, v3
+// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x0a,0x00]
+// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x0a,0x00]
+v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3]
+
+// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, -1 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x06,0x03]
+// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, -1 ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x06,0x03]
+v_addc_u32_e64 v1, s[0:1], v2, v3, -1
+
+// SI: v_addc_u32_e64 v1, vcc, v2, v3, -1 ; encoding: [0x01,0x6a,0x50,0xd2,0x02,0x07,0x06,0x03]
+// VI: v_addc_u32_e64 v1, vcc, v2, v3, -1 ; encoding: [0x01,0x6a,0x1c,0xd1,0x02,0x07,0x06,0x03]
+v_addc_u32_e64 v1, vcc, v2, v3, -1
+
+// SI: v_addc_u32_e64 v1, vcc, v2, v3, vcc ; encoding: [0x01,0x6a,0x50,0xd2,0x02,0x07,0xaa,0x01]
+// VI: v_addc_u32_e64 v1, vcc, v2, v3, vcc ; encoding: [0x01,0x6a,0x1c,0xd1,0x02,0x07,0xaa,0x01]
+v_addc_u32_e64 v1, vcc, v2, v3, vcc
+
+// SI: v_subb_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x52]
+// VI: v_subb_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x3a]
+v_subb_u32 v1, vcc, v2, v3, vcc
+
+// SI: v_subb_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x52,0xd2,0x02,0x07,0xaa,0x01]
+// VI: v_subb_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1d,0xd1,0x02,0x07,0xaa,0x01]
+v_subb_u32 v1, s[0:1], v2, v3, vcc
+
+// SICI: v_subbrev_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x54]
+// VI: v_subbrev_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x3c]
+v_subbrev_u32 v1, vcc, v2, v3, vcc
+
+// SICI: v_subbrev_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x54,0xd2,0x02,0x07,0xaa,0x01]
+// VI: v_subbrev_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0xaa,0x01]
+v_subbrev_u32 v1, s[0:1], v2, v3, vcc
// SICI: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56]
// VI: v_ldexp_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00]