let PrintMethod = PM;
}
+// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers
+def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
+ let PrintMethod = "printSel";
+}
+
def LITERAL : InstFlag<"printLiteral">;
def WRITE : InstFlag <"printWrite", 1>;
// default to 0.
def LAST : InstFlag<"printLast", 1>;
+def FRAMEri : Operand<iPTR> {
+ let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
+}
+
def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
+def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
+def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
+def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
class R600ALU_Word0 {
field bits<32> Word0;
let Word1{31} = SRF_MODE_ALL;
}
+class TEX_WORD0 {
+ field bits<32> Word0;
+
+ bits<5> TEX_INST;
+ bits<2> INST_MOD;
+ bits<1> FETCH_WHOLE_QUAD;
+ bits<8> RESOURCE_ID;
+ bits<7> SRC_GPR;
+ bits<1> SRC_REL;
+ bits<1> ALT_CONST;
+ bits<2> RESOURCE_INDEX_MODE;
+ bits<2> SAMPLER_INDEX_MODE;
+
+ let Word0{4-0} = TEX_INST;
+ let Word0{6-5} = INST_MOD;
+ let Word0{7} = FETCH_WHOLE_QUAD;
+ let Word0{15-8} = RESOURCE_ID;
+ let Word0{22-16} = SRC_GPR;
+ let Word0{23} = SRC_REL;
+ let Word0{24} = ALT_CONST;
+ let Word0{26-25} = RESOURCE_INDEX_MODE;
+ let Word0{28-27} = SAMPLER_INDEX_MODE;
+}
+
+class TEX_WORD1 {
+ field bits<32> Word1;
+
+ bits<7> DST_GPR;
+ bits<1> DST_REL;
+ bits<3> DST_SEL_X;
+ bits<3> DST_SEL_Y;
+ bits<3> DST_SEL_Z;
+ bits<3> DST_SEL_W;
+ bits<7> LOD_BIAS;
+ bits<1> COORD_TYPE_X;
+ bits<1> COORD_TYPE_Y;
+ bits<1> COORD_TYPE_Z;
+ bits<1> COORD_TYPE_W;
+
+ let Word1{6-0} = DST_GPR;
+ let Word1{7} = DST_REL;
+ let Word1{11-9} = DST_SEL_X;
+ let Word1{14-12} = DST_SEL_Y;
+ let Word1{17-15} = DST_SEL_Z;
+ let Word1{20-18} = DST_SEL_W;
+ let Word1{27-21} = LOD_BIAS;
+ let Word1{28} = COORD_TYPE_X;
+ let Word1{29} = COORD_TYPE_Y;
+ let Word1{30} = COORD_TYPE_Z;
+ let Word1{31} = COORD_TYPE_W;
+}
+
+class TEX_WORD2 {
+ field bits<32> Word2;
+
+ bits<5> OFFSET_X;
+ bits<5> OFFSET_Y;
+ bits<5> OFFSET_Z;
+ bits<5> SAMPLER_ID;
+ bits<3> SRC_SEL_X;
+ bits<3> SRC_SEL_Y;
+ bits<3> SRC_SEL_Z;
+ bits<3> SRC_SEL_W;
+
+ let Word2{4-0} = OFFSET_X;
+ let Word2{9-5} = OFFSET_Y;
+ let Word2{14-10} = OFFSET_Z;
+ let Word2{19-15} = SAMPLER_ID;
+ let Word2{22-20} = SRC_SEL_X;
+ let Word2{25-23} = SRC_SEL_Y;
+ let Word2{28-26} = SRC_SEL_Z;
+ let Word2{31-29} = SRC_SEL_W;
+}
+
/*
XXX: R600 subtarget uses a slightly different encoding than the other
subtargets. We currently handle this in R600MCCodeEmitter, but we may
InstR600 <0,
(outs R600_Reg32:$dst),
(ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
- R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
- !strconcat(opName,
+ !strconcat(" ", opName,
"$clamp $dst$write$dst_rel$omod, "
"$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
"$literal $pred_sel$last"),
(outs R600_Reg32:$dst),
(ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
- R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
- R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
+ R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
- !strconcat(opName,
+ !strconcat(" ", opName,
"$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
"$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
"$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
InstR600 <0,
(outs R600_Reg32:$dst),
(ins REL:$dst_rel, CLAMP:$clamp,
- R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel,
- R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel,
- R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
+ R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
+ R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
- !strconcat(opName, "$clamp $dst$dst_rel, "
+ !strconcat(" ", opName, "$clamp $dst$dst_rel, "
"$src0_neg$src0$src0_rel, "
"$src1_neg$src1$src1_rel, "
"$src2_neg$src2$src2_rel, "
class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
InstrItinClass itin = AnyALU> :
InstR600 <inst,
- (outs R600_Reg128:$dst),
- (ins R600_Reg128:$src0, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
- !strconcat(opName, "$dst, $src0, $resourceId, $samplerId, $textureTarget"),
+ (outs R600_Reg128:$DST_GPR),
+ (ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget),
+ !strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, $textureTarget"),
pattern,
- itin>{
- let Inst {10-0} = inst;
+ itin>, TEX_WORD0, TEX_WORD1, TEX_WORD2 {
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+
+ let TEX_INST = inst{4-0};
+ let SRC_REL = 0;
+ let DST_REL = 0;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 1;
+ let DST_SEL_Z = 2;
+ let DST_SEL_W = 3;
+ let LOD_BIAS = 0;
+
+ let INST_MOD = 0;
+ let FETCH_WHOLE_QUAD = 0;
+ let ALT_CONST = 0;
+ let SAMPLER_INDEX_MODE = 0;
+
+ let COORD_TYPE_X = 0;
+ let COORD_TYPE_Y = 0;
+ let COORD_TYPE_Z = 0;
+ let COORD_TYPE_W = 0;
}
} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
}]
>;
+def TEX_ARRAY : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 9 || TType == 10 || TType == 15 || TType == 16;
+ }]
+>;
+
+def TEX_SHADOW_ARRAY : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 11 || TType == 12 || TType == 17;
+ }]
+>;
+
class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs,
dag ins, string asm, list<dag> pattern> :
InstR600ISA <outs, ins, asm, pattern> {
"Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
//===----------------------------------------------------------------------===//
-// Interpolation Instructions
+// R600 SDNodes
//===----------------------------------------------------------------------===//
-def INTERP: SDNode<"AMDGPUISD::INTERP",
- SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]>
- >;
+def INTERP_PAIR_XY : AMDGPUShaderInst <
+ (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),
+ (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
+ "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1",
+ []>;
-def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0",
- SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]>
- >;
+def INTERP_PAIR_ZW : AMDGPUShaderInst <
+ (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1),
+ (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
+ "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",
+ []>;
-let usesCustomInserter = 1 in {
-def input_perspective : AMDGPUShaderInst <
- (outs R600_Reg128:$dst),
- (ins i32imm:$src0, i32imm:$src1),
- "input_perspective $src0 $src1 : dst",
- [(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>;
-} // End usesCustomInserter = 1
-
-def input_constant : AMDGPUShaderInst <
- (outs R600_Reg128:$dst),
- (ins i32imm:$src),
- "input_perspective $src : dst",
- [(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>;
+def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
+ SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
+ [SDNPVariadic]
+>;
+//===----------------------------------------------------------------------===//
+// Interpolation Instructions
+//===----------------------------------------------------------------------===//
+def INTERP_VEC_LOAD : AMDGPUShaderInst <
+ (outs R600_Reg128:$dst),
+ (ins i32imm:$src0),
+ "INTERP_LOAD $src0 : $dst",
+ []>;
def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
let bank_swizzle = 5;
// Export Instructions
//===----------------------------------------------------------------------===//
-def ExportType : SDTypeProfile<0, 5, [SDTCisFP<0>, SDTCisInt<1>]>;
+def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
[SDNPHasChain, SDNPSideEffect]>;
multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
(ExportInst
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
0, 61, 0, 7, 7, 7, cf_inst, 0)
>;
def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
(ExportInst
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
0, 61, 7, 0, 7, 7, cf_inst, 0)
>;
- def : Pat<(int_R600_store_pixel_dummy),
+ def : Pat<(int_R600_store_dummy (i32 imm:$type)),
(ExportInst
- (v4f32 (IMPLICIT_DEF)), 0, 0, 7, 7, 7, 7, cf_inst, 0)
+ (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0)
>;
- def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 0),
- (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)),
- (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
- 0, 1, 2, 3, cf_inst, 0)
+ def : Pat<(int_R600_store_dummy 1),
+ (ExportInst
+ (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0)
>;
+
+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type),
+ (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)),
+ (ExportInst R600_Reg128:$src, imm:$type, imm:$base,
+ imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0)
+ >;
+
}
multiclass SteamOutputExportPattern<Instruction ExportInst,
bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> {
// Stream0
- def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1),
- (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)),
- (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+ (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
4095, imm:$mask, buf0inst, 0)>;
// Stream1
- def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 2),
- (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)),
- (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+ (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
4095, imm:$mask, buf1inst, 0)>;
// Stream2
- def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 3),
- (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)),
- (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+ (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
4095, imm:$mask, buf2inst, 0)>;
// Stream3
- def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 4),
- (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)),
- (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+ (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
4095, imm:$mask, buf3inst, 0)>;
}
-let isTerminator = 1, usesCustomInserter = 1 in {
+// Export Instructions should not be duplicated by TailDuplication pass
+// (which assumes that duplicable instruction are affected by exec mask)
+let usesCustomInserter = 1, isNotDuplicable = 1 in {
class ExportSwzInst : InstR600ISA<(
outs),
let Inst{63-32} = Word1;
}
-} // End isTerminator = 1, usesCustomInserter = 1
+} // End usesCustomInserter = 1
class ExportBufInst : InstR600ISA<(
outs),
let Inst{63-32} = Word1;
}
-let Predicates = [isR600toCayman] in {
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions
+//===----------------------------------------------------------------------===//
+
+class CF_ALU_WORD0 {
+ field bits<32> Word0;
+
+ bits<22> ADDR;
+ bits<4> KCACHE_BANK0;
+ bits<4> KCACHE_BANK1;
+ bits<2> KCACHE_MODE0;
+
+ let Word0{21-0} = ADDR;
+ let Word0{25-22} = KCACHE_BANK0;
+ let Word0{29-26} = KCACHE_BANK1;
+ let Word0{31-30} = KCACHE_MODE0;
+}
+
+class CF_ALU_WORD1 {
+ field bits<32> Word1;
+
+ bits<2> KCACHE_MODE1;
+ bits<8> KCACHE_ADDR0;
+ bits<8> KCACHE_ADDR1;
+ bits<7> COUNT;
+ bits<1> ALT_CONST;
+ bits<4> CF_INST;
+ bits<1> WHOLE_QUAD_MODE;
+ bits<1> BARRIER;
+
+ let Word1{1-0} = KCACHE_MODE1;
+ let Word1{9-2} = KCACHE_ADDR0;
+ let Word1{17-10} = KCACHE_ADDR1;
+ let Word1{24-18} = COUNT;
+ let Word1{25} = ALT_CONST;
+ let Word1{29-26} = CF_INST;
+ let Word1{30} = WHOLE_QUAD_MODE;
+ let Word1{31} = BARRIER;
+}
+
+class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs),
+(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, i32imm:$KCACHE_MODE0, i32imm:$KCACHE_MODE1,
+i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT),
+!strconcat(OpName, " $COUNT, @$ADDR, "
+"KC0[CB$KCACHE_BANK0:$KCACHE_ADDR0-$KCACHE_ADDR0+32]"
+", KC1[CB$KCACHE_BANK1:$KCACHE_ADDR1-$KCACHE_ADDR1+32]"),
+[] >, CF_ALU_WORD0, CF_ALU_WORD1 {
+ field bits<64> Inst;
+
+ let CF_INST = inst;
+ let ALT_CONST = 0;
+ let WHOLE_QUAD_MODE = 0;
+ let BARRIER = 1;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+class CF_WORD0_R600 {
+ field bits<32> Word0;
+
+ bits<32> ADDR;
+
+ let Word0 = ADDR;
+}
+
+class CF_WORD1_R600 {
+ field bits<32> Word1;
+
+ bits<3> POP_COUNT;
+ bits<5> CF_CONST;
+ bits<2> COND;
+ bits<3> COUNT;
+ bits<6> CALL_COUNT;
+ bits<1> COUNT_3;
+ bits<1> END_OF_PROGRAM;
+ bits<1> VALID_PIXEL_MODE;
+ bits<7> CF_INST;
+ bits<1> WHOLE_QUAD_MODE;
+ bits<1> BARRIER;
+
+ let Word1{2-0} = POP_COUNT;
+ let Word1{7-3} = CF_CONST;
+ let Word1{9-8} = COND;
+ let Word1{12-10} = COUNT;
+ let Word1{18-13} = CALL_COUNT;
+ let Word1{19} = COUNT_3;
+ let Word1{21} = END_OF_PROGRAM;
+ let Word1{22} = VALID_PIXEL_MODE;
+ let Word1{29-23} = CF_INST;
+ let Word1{30} = WHOLE_QUAD_MODE;
+ let Word1{31} = BARRIER;
+}
+
+class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
+ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
+ field bits<64> Inst;
+
+ let CF_INST = inst;
+ let BARRIER = 1;
+ let CF_CONST = 0;
+ let VALID_PIXEL_MODE = 0;
+ let COND = 0;
+ let CALL_COUNT = 0;
+ let COUNT_3 = 0;
+ let END_OF_PROGRAM = 0;
+ let WHOLE_QUAD_MODE = 0;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+class CF_WORD0_EG {
+ field bits<32> Word0;
+
+ bits<24> ADDR;
+ bits<3> JUMPTABLE_SEL;
+
+ let Word0{23-0} = ADDR;
+ let Word0{26-24} = JUMPTABLE_SEL;
+}
+
+class CF_WORD1_EG {
+ field bits<32> Word1;
+
+ bits<3> POP_COUNT;
+ bits<5> CF_CONST;
+ bits<2> COND;
+ bits<6> COUNT;
+ bits<1> VALID_PIXEL_MODE;
+ bits<1> END_OF_PROGRAM;
+ bits<8> CF_INST;
+ bits<1> BARRIER;
+
+ let Word1{2-0} = POP_COUNT;
+ let Word1{7-3} = CF_CONST;
+ let Word1{9-8} = COND;
+ let Word1{15-10} = COUNT;
+ let Word1{20} = VALID_PIXEL_MODE;
+ let Word1{29-22} = CF_INST;
+ let Word1{31} = BARRIER;
+}
+
+class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
+ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
+ field bits<64> Inst;
+
+ let CF_INST = inst;
+ let BARRIER = 1;
+ let JUMPTABLE_SEL = 0;
+ let CF_CONST = 0;
+ let VALID_PIXEL_MODE = 0;
+ let COND = 0;
+ let END_OF_PROGRAM = 0;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+def CF_ALU : ALU_CLAUSE<8, "ALU">;
+def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
+
+def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > {
+ field bits<64> Inst;
+}
+
+let Predicates = [isR600toCayman] in {
//===----------------------------------------------------------------------===//
// Common Instructions R600, R700, Evergreen, Cayman
COND_NE))]
>;
+def SETE_DX10 : R600_2OP <
+ 0xC, "SETE_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_EQ))]
+>;
+
+def SETGT_DX10 : R600_2OP <
+ 0xD, "SETGT_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_GT))]
+>;
+
+def SETGE_DX10 : R600_2OP <
+ 0xE, "SETGE_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_GE))]
+>;
+
+def SETNE_DX10 : R600_2OP <
+ 0xF, "SETNE_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_NE))]
+>;
+
def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>;
def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
>;
def SETGT_INT : R600_2OP <
- 0x3B, "SGT_INT",
+ 0x3B, "SETGT_INT",
[(set (i32 R600_Reg32:$dst),
(selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))]
>;
def TEX_LD : R600_TEX <
0x03, "TEX_LD",
- [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txf R600_Reg128:$SRC_GPR,
+ imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID,
+ imm:$SAMPLER_ID, imm:$textureTarget))]
> {
-let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $resourceId, $samplerId, $textureTarget";
-let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget);
+let AsmString = "TEX_LD $DST_GPR, $SRC_GPR, $OFFSET_X, $OFFSET_Y, $OFFSET_Z,"
+ "$RESOURCE_ID, $SAMPLER_ID, $textureTarget";
+let InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X,
+ i32imm:$OFFSET_Y, i32imm:$OFFSET_Z, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID,
+ i32imm:$textureTarget);
}
def TEX_GET_TEXTURE_RESINFO : R600_TEX <
0x04, "TEX_GET_TEXTURE_RESINFO",
- [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txq R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
>;
def TEX_GET_GRADIENTS_H : R600_TEX <
0x07, "TEX_GET_GRADIENTS_H",
- [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddx R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
>;
def TEX_GET_GRADIENTS_V : R600_TEX <
0x08, "TEX_GET_GRADIENTS_V",
- [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddy R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
>;
def TEX_SET_GRADIENTS_H : R600_TEX <
def TEX_SAMPLE : R600_TEX <
0x10, "TEX_SAMPLE",
- [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
>;
def TEX_SAMPLE_C : R600_TEX <
0x18, "TEX_SAMPLE_C",
- [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
>;
def TEX_SAMPLE_L : R600_TEX <
0x11, "TEX_SAMPLE_L",
- [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
>;
def TEX_SAMPLE_C_L : R600_TEX <
0x19, "TEX_SAMPLE_C_L",
- [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
>;
def TEX_SAMPLE_LB : R600_TEX <
0x12, "TEX_SAMPLE_LB",
- [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0,imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
>;
def TEX_SAMPLE_C_LB : R600_TEX <
0x1A, "TEX_SAMPLE_C_LB",
- [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
>;
def TEX_SAMPLE_G : R600_TEX <
class MULADD_Common <bits<5> inst> : R600_3OP <
inst, "MULADD",
+ []
+>;
+
+class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
+ inst, "MULADD_IEEE",
[(set (f32 R600_Reg32:$dst),
- (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))]
+ (fadd (fmul R600_Reg32:$src0, R600_Reg32:$src1), R600_Reg32:$src2))]
>;
class CNDE_Common <bits<5> inst> : R600_3OP <
multiclass DIV_Common <InstR600 recip_ieee> {
def : Pat<
(int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1),
- (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+ (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
>;
def : Pat<
(fdiv R600_Reg32:$src0, R600_Reg32:$src1),
- (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+ (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
>;
}
def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
def MULADD_r600 : MULADD_Common<0x10>;
+ def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>;
def CNDE_r600 : CNDE_Common<0x18>;
def CNDGT_r600 : CNDGT_Common<0x19>;
def CNDGE_r600 : CNDGE_Common<0x1A>;
def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
+ def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL, R600_Reg32>;
def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
def : Pat<(fsqrt R600_Reg32:$src),
(MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_r600 R600_Reg32:$src))>;
def R600_ExportSwz : ExportSwzInst {
- let Word1{20-17} = 1; // BURST_COUNT
+ let Word1{20-17} = 0; // BURST_COUNT
let Word1{21} = eop;
let Word1{22} = 1; // VALID_PIXEL_MODE
let Word1{30-23} = inst;
defm : ExportPattern<R600_ExportSwz, 39>;
def R600_ExportBuf : ExportBufInst {
- let Word1{20-17} = 1; // BURST_COUNT
+ let Word1{20-17} = 0; // BURST_COUNT
let Word1{21} = eop;
let Word1{22} = 1; // VALID_PIXEL_MODE
let Word1{30-23} = inst;
let Word1{31} = 1; // BARRIER
}
defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
+
+ def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT),
+ "TEX $COUNT @$ADDR"> {
+ let POP_COUNT = 0;
+ }
+ def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT),
+ "VTX $COUNT @$ADDR"> {
+ let POP_COUNT = 0;
+ }
+ def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR),
+ "LOOP_START_DX10 @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR),
+ "LOOP_BREAK @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR),
+ "CONTINUE @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+ "JUMP @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+ }
+ def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+ "ELSE @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+ }
+ def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> {
+ let ADDR = 0;
+ let COUNT = 0;
+ let POP_COUNT = 0;
+ }
+ def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+ "POP @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+ }
+ def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> {
+ let COUNT = 0;
+ let POP_COUNT = 0;
+ let ADDR = 0;
+ let END_OF_PROGRAM = 1;
+ }
+
}
// Helper pattern for normalizing inputs to triginomic instructions for R700+
// cards.
class COS_PAT <InstR600 trig> : Pat<
(fcos R600_Reg32:$src),
- (trig (MUL (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
+ (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
>;
class SIN_PAT <InstR600 trig> : Pat<
(fsin R600_Reg32:$src),
- (trig (MUL (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
+ (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
>;
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
let Predicates = [isEG] in {
-
+
def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>;
defm DIV_eg : DIV_Common<RECIP_IEEE_eg>;
def SIN_eg : SIN_Common<0x8D>;
def COS_eg : COS_Common<0x8E>;
+def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL, R600_Reg32>;
def : SIN_PAT <SIN_eg>;
def : COS_PAT <COS_eg>;
def : Pat<(fsqrt R600_Reg32:$src),
VecALU
>;
+ def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", []>;
+ defm : BFIPatterns <BFI_INT_eg>;
+
def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
[(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
R600_Reg32:$src2))],
>;
def MULADD_eg : MULADD_Common<0x14>;
+ def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
def ASHR_eg : ASHR_Common<0x15>;
def LSHR_eg : LSHR_Common<0x16>;
def LSHL_eg : LSHL_Common<0x17>;
defm DOT4_eg : DOT4_Common<0xBE>;
defm CUBE_eg : CUBE_Common<0xC0>;
+let hasSideEffects = 1 in {
+ def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>;
+}
+
def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
(FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>;
def EG_ExportSwz : ExportSwzInst {
- let Word1{19-16} = 1; // BURST_COUNT
+ let Word1{19-16} = 0; // BURST_COUNT
let Word1{20} = 1; // VALID_PIXEL_MODE
let Word1{21} = eop;
let Word1{29-22} = inst;
defm : ExportPattern<EG_ExportSwz, 83>;
def EG_ExportBuf : ExportBufInst {
- let Word1{19-16} = 1; // BURST_COUNT
+ let Word1{19-16} = 0; // BURST_COUNT
let Word1{20} = 1; // VALID_PIXEL_MODE
let Word1{21} = eop;
let Word1{29-22} = inst;
}
defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
+ def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT),
+ "TEX $COUNT @$ADDR"> {
+ let POP_COUNT = 0;
+ }
+ def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT),
+ "VTX $COUNT @$ADDR"> {
+ let POP_COUNT = 0;
+ }
+ def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR),
+ "LOOP_START_DX10 @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR),
+ "LOOP_BREAK @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR),
+ "CONTINUE @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+ "JUMP @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+ }
+ def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+ "ELSE @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+ }
+ def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> {
+ let ADDR = 0;
+ let COUNT = 0;
+ let POP_COUNT = 0;
+ }
+ def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+ "POP @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+ }
+ def CF_END_EG : CF_CLAUSE_EG<0, (ins), "CF_END"> {
+ let COUNT = 0;
+ let POP_COUNT = 0;
+ let ADDR = 0;
+ let END_OF_PROGRAM = 1;
+ }
+
//===----------------------------------------------------------------------===//
// Memory read/write instructions
//===----------------------------------------------------------------------===//
// This is not really necessary, but there were some GPU hangs that appeared
// to be caused by ALU instructions in the next instruction group that wrote
- // to the $ptr registers of the VTX_READ.
+ // to the $ptr registers of the VTX_READ.
// e.g.
// %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
// %T2_X<def> = MOV %ZERO
[(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
>;
+def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
+ [(set (v4i32 R600_Reg128:$dst), (load_param ADDRVTX_READ:$ptr))]
+>;
+
//===----------------------------------------------------------------------===//
// VTX Read from global memory space
//===----------------------------------------------------------------------===//
}
+//===----------------------------------------------------------------------===//
+// Regist loads and stores - for indirect addressing
+//===----------------------------------------------------------------------===//
+
+defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
+
let Predicates = [isCayman] in {
-let isVector = 1 in {
+let isVector = 1 in {
def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>;
def EXP_IEEE_cm : EXP_IEEE_Common<0x81>;
-def LOG_IEEE_ : LOG_IEEE_Common<0x83>;
+def LOG_IEEE_cm : LOG_IEEE_Common<0x83>;
def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>;
def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>;
def SIN_cm : SIN_Common<0x8D>;
def COS_cm : COS_Common<0x8E>;
} // End isVector = 1
+def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL, R600_Reg32>;
def : SIN_PAT <SIN_cm>;
def : COS_PAT <COS_cm>;
defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
// RECIP_UINT emulation for Cayman
+// The multiplication scales from [0,1] to the unsigned integer range
def : Pat <
(AMDGPUurecip R600_Reg32:$src0),
(FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)),
- (MOV_IMM_I32 0x4f800000)))
+ (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
>;
+ def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
+ let ADDR = 0;
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
def : Pat<(fsqrt R600_Reg32:$src),
(MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>;
let FlagOperandIdx = 3;
}
-let isTerminator = 1, isBranch = 1, isBarrier = 1 in {
-
-def JUMP : InstR600 <0x10,
+let isTerminator = 1, isBranch = 1 in {
+def JUMP_COND : InstR600 <0x10,
(outs),
- (ins brtarget:$target, R600_Pred:$p),
+ (ins brtarget:$target, R600_Predicate_Bit:$p),
"JUMP $target ($p)",
[], AnyALU
>;
-} // End isTerminator = 1, isBranch = 1, isBarrier = 1
+def JUMP : InstR600 <0x10,
+ (outs),
+ (ins brtarget:$target),
+ "JUMP $target",
+ [], AnyALU
+ >
+{
+ let isPredicable = 1;
+ let isBarrier = 1;
+}
+
+} // End isTerminator = 1, isBranch = 1
let usesCustomInserter = 1 in {
} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
-def R600_LOAD_CONST : AMDGPUShaderInst <
- (outs R600_Reg32:$dst),
- (ins i32imm:$src0),
- "R600_LOAD_CONST $dst, $src0",
- [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))]
->;
-
-def RESERVE_REG : AMDGPUShaderInst <
- (outs),
- (ins i32imm:$src),
- "RESERVE_REG $src",
- [(int_AMDGPU_reserve_reg imm:$src)]
->;
def TXD: AMDGPUShaderInst <
(outs R600_Reg128:$dst),
//===---------------------------------------------------------------------===//
// Return instruction
//===---------------------------------------------------------------------===//
-let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
+let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
+ usesCustomInserter = 1 in {
def RETURN : ILFormat<(outs), (ins variable_ops),
"RETURN", [(IL_retflag)]>;
}
+
+//===----------------------------------------------------------------------===//
+// Constant Buffer Addressing Support
+//===----------------------------------------------------------------------===//
+
+let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
+def CONST_COPY : Instruction {
+ let OutOperandList = (outs R600_Reg32:$dst);
+ let InOperandList = (ins i32imm:$src);
+ let Pattern =
+ [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
+ let AsmString = "CONST_COPY";
+ let neverHasSideEffects = 1;
+ let isAsCheapAsAMove = 1;
+ let Itinerary = NullALU;
+}
+} // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
+
+def TEX_VTX_CONSTBUF :
+ InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
+ [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
+ VTX_WORD1_GPR, VTX_WORD0 {
+
+ let VC_INST = 0;
+ let FETCH_TYPE = 2;
+ let FETCH_WHOLE_QUAD = 0;
+ let SRC_REL = 0;
+ let SRC_SEL_X = 0;
+ let DST_REL = 0;
+ let USE_CONST_FIELDS = 0;
+ let NUM_FORMAT_ALL = 2;
+ let FORMAT_COMP_ALL = 1;
+ let SRF_MODE_ALL = 1;
+ let MEGA_FETCH_COUNT = 16;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 1;
+ let DST_SEL_Z = 2;
+ let DST_SEL_W = 3;
+ let DATA_FORMAT = 35;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+
+// LLVM can only encode 64-bit instructions, so these fields are manually
+// encoded in R600CodeEmitter
+//
+// bits<16> OFFSET;
+// bits<2> ENDIAN_SWAP = 0;
+// bits<1> CONST_BUF_NO_STRIDE = 0;
+// bits<1> MEGA_FETCH = 0;
+// bits<1> ALT_CONST = 0;
+// bits<2> BUFFER_INDEX_MODE = 0;
+
+
+
+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+// is done in R600CodeEmitter
+//
+// Inst{79-64} = OFFSET;
+// Inst{81-80} = ENDIAN_SWAP;
+// Inst{82} = CONST_BUF_NO_STRIDE;
+// Inst{83} = MEGA_FETCH;
+// Inst{84} = ALT_CONST;
+// Inst{86-85} = BUFFER_INDEX_MODE;
+// Inst{95-86} = 0; Reserved
+
+// VTX_WORD3 (Padding)
+//
+// Inst{127-96} = 0;
+}
+
+def TEX_VTX_TEXBUF:
+ InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
+ [(set R600_Reg128:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
+VTX_WORD1_GPR, VTX_WORD0 {
+
+let VC_INST = 0;
+let FETCH_TYPE = 2;
+let FETCH_WHOLE_QUAD = 0;
+let SRC_REL = 0;
+let SRC_SEL_X = 0;
+let DST_REL = 0;
+let USE_CONST_FIELDS = 1;
+let NUM_FORMAT_ALL = 0;
+let FORMAT_COMP_ALL = 0;
+let SRF_MODE_ALL = 1;
+let MEGA_FETCH_COUNT = 16;
+let DST_SEL_X = 0;
+let DST_SEL_Y = 1;
+let DST_SEL_Z = 2;
+let DST_SEL_W = 3;
+let DATA_FORMAT = 0;
+
+let Inst{31-0} = Word0;
+let Inst{63-32} = Word1;
+
+// LLVM can only encode 64-bit instructions, so these fields are manually
+// encoded in R600CodeEmitter
+//
+// bits<16> OFFSET;
+// bits<2> ENDIAN_SWAP = 0;
+// bits<1> CONST_BUF_NO_STRIDE = 0;
+// bits<1> MEGA_FETCH = 0;
+// bits<1> ALT_CONST = 0;
+// bits<2> BUFFER_INDEX_MODE = 0;
+
+
+
+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+// is done in R600CodeEmitter
+//
+// Inst{79-64} = OFFSET;
+// Inst{81-80} = ENDIAN_SWAP;
+// Inst{82} = CONST_BUF_NO_STRIDE;
+// Inst{83} = MEGA_FETCH;
+// Inst{84} = ALT_CONST;
+// Inst{86-85} = BUFFER_INDEX_MODE;
+// Inst{95-86} = 0; Reserved
+
+// VTX_WORD3 (Padding)
+//
+// Inst{127-96} = 0;
+}
+
+
+
//===--------------------------------------------------------------------===//
// Instructions support
//===--------------------------------------------------------------------===//
// ISel Patterns
//===----------------------------------------------------------------------===//
+// CND*_INT Pattterns for f32 True / False values
+
+class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat <
+ (selectcc (i32 R600_Reg32:$src0), 0, (f32 R600_Reg32:$src1),
+ R600_Reg32:$src2, cc),
+ (cnd R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2)
+>;
+
+def : CND_INT_f32 <CNDE_INT, SETEQ>;
+def : CND_INT_f32 <CNDGT_INT, SETGT>;
+def : CND_INT_f32 <CNDGE_INT, SETGE>;
+
//CNDGE_INT extra pattern
def : Pat <
(selectcc (i32 R600_Reg32:$src0), -1, (i32 R600_Reg32:$src1),
// SGE Reverse args
def : Pat <
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE),
- (SGE R600_Reg32:$src1, R600_Reg32:$src0)
+ (SGE R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGT_DX10 reverse args
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LT),
+ (SETGT_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGE_DX10 reverse args
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LE),
+ (SETGE_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
>;
// SETGT_INT reverse args
(SETE R600_Reg32:$src0, R600_Reg32:$src1)
>;
+//SETE_DX10 - 'true if ordered'
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETO),
+ (SETE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
//SNE - 'true if unordered'
def : Pat <
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO),
(SNE R600_Reg32:$src0, R600_Reg32:$src1)
>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>;
+//SETNE_DX10 - 'true if ordered'
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUO),
+ (SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
+def : Extract_Element <f32, v4f32, R600_Reg128, 0, sub0>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 1, sub1>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 2, sub2>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 3, sub3>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sel_x>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sel_y>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sel_z>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sel_w>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sub0>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sub1>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sub2>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sub3>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 0, sub0>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 1, sub1>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 2, sub2>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 3, sub3>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sel_x>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sel_y>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sel_z>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sel_w>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sub0>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>;
-def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
-def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
+def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
+def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
// bitconvert patterns