1 //===- PTXInstrInfo.td - PTX Instruction defs -----------------*- tblgen-*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the PTX instructions in TableGen format.
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // Instruction format superclass
16 //===----------------------------------------------------------------------===//
18 include "PTXInstrFormats.td"
20 //===----------------------------------------------------------------------===//
21 // Code Generation Predicates
22 //===----------------------------------------------------------------------===//
24 def Use32BitAddresses : Predicate<"!getSubtarget().use64BitAddresses()">;
25 def Use64BitAddresses : Predicate<"getSubtarget().use64BitAddresses()">;
27 //===----------------------------------------------------------------------===//
28 // Instruction Pattern Stuff
29 //===----------------------------------------------------------------------===//
31 def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
33 const PointerType *PT;
34 if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
35 (PT = dyn_cast<PointerType>(Src->getType())))
36 return PT->getAddressSpace() == PTX::GLOBAL;
40 def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
42 const PointerType *PT;
43 if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
44 (PT = dyn_cast<PointerType>(Src->getType())))
45 return PT->getAddressSpace() == PTX::CONSTANT;
49 def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{
51 const PointerType *PT;
52 if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
53 (PT = dyn_cast<PointerType>(Src->getType())))
54 return PT->getAddressSpace() == PTX::LOCAL;
58 def load_parameter : PatFrag<(ops node:$ptr), (load node:$ptr), [{
60 const PointerType *PT;
61 if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
62 (PT = dyn_cast<PointerType>(Src->getType())))
63 return PT->getAddressSpace() == PTX::PARAMETER;
67 def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
69 const PointerType *PT;
70 if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
71 (PT = dyn_cast<PointerType>(Src->getType())))
72 return PT->getAddressSpace() == PTX::SHARED;
77 : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
79 const PointerType *PT;
80 if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
81 (PT = dyn_cast<PointerType>(Src->getType())))
82 return PT->getAddressSpace() == PTX::GLOBAL;
87 : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
89 const PointerType *PT;
90 if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
91 (PT = dyn_cast<PointerType>(Src->getType())))
92 return PT->getAddressSpace() == PTX::LOCAL;
97 : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
99 const PointerType *PT;
100 if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
101 (PT = dyn_cast<PointerType>(Src->getType())))
102 return PT->getAddressSpace() == PTX::PARAMETER;
107 : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
109 const PointerType *PT;
110 if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
111 (PT = dyn_cast<PointerType>(Src->getType())))
112 return PT->getAddressSpace() == PTX::SHARED;
117 def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
118 def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>;
119 def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
120 def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>;
121 def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
122 def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
126 def MEMri32 : Operand<i32> {
127 let PrintMethod = "printMemOperand";
128 let MIOperandInfo = (ops RRegu32, i32imm);
130 def MEMri64 : Operand<i64> {
131 let PrintMethod = "printMemOperand";
132 let MIOperandInfo = (ops RRegu64, i64imm);
134 def MEMii32 : Operand<i32> {
135 let PrintMethod = "printMemOperand";
136 let MIOperandInfo = (ops i32imm, i32imm);
138 def MEMii64 : Operand<i64> {
139 let PrintMethod = "printMemOperand";
140 let MIOperandInfo = (ops i64imm, i64imm);
142 // The operand here does not correspond to an actual address, so we
143 // can use i32 in 64-bit address modes.
144 def MEMpi : Operand<i32> {
145 let PrintMethod = "printParamOperand";
146 let MIOperandInfo = (ops i32imm);
150 //===----------------------------------------------------------------------===//
151 // PTX Specific Node Definitions
152 //===----------------------------------------------------------------------===//
154 // PTX allow generic 3-reg shifts like shl r0, r1, r2
155 def PTXshl : SDNode<"ISD::SHL", SDTIntBinOp>;
156 def PTXsrl : SDNode<"ISD::SRL", SDTIntBinOp>;
157 def PTXsra : SDNode<"ISD::SRA", SDTIntBinOp>;
160 : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>;
162 : SDNode<"PTXISD::RET", SDTNone, [SDNPHasChain]>;
164 //===----------------------------------------------------------------------===//
165 // Instruction Class Templates
166 //===----------------------------------------------------------------------===//
168 // Three-operand floating-point instruction template
169 multiclass FLOAT3<string opcstr, SDNode opnode> {
170 def rr32 : InstPTX<(outs RRegf32:$d),
171 (ins RRegf32:$a, RRegf32:$b),
172 !strconcat(opcstr, ".f32\t$d, $a, $b"),
173 [(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>;
174 def ri32 : InstPTX<(outs RRegf32:$d),
175 (ins RRegf32:$a, f32imm:$b),
176 !strconcat(opcstr, ".f32\t$d, $a, $b"),
177 [(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>;
178 def rr64 : InstPTX<(outs RRegf64:$d),
179 (ins RRegf64:$a, RRegf64:$b),
180 !strconcat(opcstr, ".f64\t$d, $a, $b"),
181 [(set RRegf64:$d, (opnode RRegf64:$a, RRegf64:$b))]>;
182 def ri64 : InstPTX<(outs RRegf64:$d),
183 (ins RRegf64:$a, f64imm:$b),
184 !strconcat(opcstr, ".f64\t$d, $a, $b"),
185 [(set RRegf64:$d, (opnode RRegf64:$a, fpimm:$b))]>;
188 multiclass INT3<string opcstr, SDNode opnode> {
189 def rr16 : InstPTX<(outs RRegu16:$d),
190 (ins RRegu16:$a, RRegu16:$b),
191 !strconcat(opcstr, ".u16\t$d, $a, $b"),
192 [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>;
193 def ri16 : InstPTX<(outs RRegu16:$d),
194 (ins RRegu16:$a, i16imm:$b),
195 !strconcat(opcstr, ".u16\t$d, $a, $b"),
196 [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>;
197 def rr32 : InstPTX<(outs RRegu32:$d),
198 (ins RRegu32:$a, RRegu32:$b),
199 !strconcat(opcstr, ".u32\t$d, $a, $b"),
200 [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
201 def ri32 : InstPTX<(outs RRegu32:$d),
202 (ins RRegu32:$a, i32imm:$b),
203 !strconcat(opcstr, ".u32\t$d, $a, $b"),
204 [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
205 def rr64 : InstPTX<(outs RRegu64:$d),
206 (ins RRegu64:$a, RRegu64:$b),
207 !strconcat(opcstr, ".u64\t$d, $a, $b"),
208 [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>;
209 def ri64 : InstPTX<(outs RRegu64:$d),
210 (ins RRegu64:$a, i64imm:$b),
211 !strconcat(opcstr, ".u64\t$d, $a, $b"),
212 [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
215 // no %type directive, non-communtable
216 multiclass INT3ntnc<string opcstr, SDNode opnode> {
217 def rr : InstPTX<(outs RRegu32:$d),
218 (ins RRegu32:$a, RRegu32:$b),
219 !strconcat(opcstr, "\t$d, $a, $b"),
220 [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
221 def ri : InstPTX<(outs RRegu32:$d),
222 (ins RRegu32:$a, i32imm:$b),
223 !strconcat(opcstr, "\t$d, $a, $b"),
224 [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
225 def ir : InstPTX<(outs RRegu32:$d),
226 (ins i32imm:$a, RRegu32:$b),
227 !strconcat(opcstr, "\t$d, $a, $b"),
228 [(set RRegu32:$d, (opnode imm:$a, RRegu32:$b))]>;
231 multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> {
232 def rr32 : InstPTX<(outs RC:$d),
234 !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
235 [(set RC:$d, (pat_load ADDRrr32:$a))]>, Requires<[Use32BitAddresses]>;
236 def rr64 : InstPTX<(outs RC:$d),
238 !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
239 [(set RC:$d, (pat_load ADDRrr64:$a))]>, Requires<[Use64BitAddresses]>;
240 def ri32 : InstPTX<(outs RC:$d),
242 !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
243 [(set RC:$d, (pat_load ADDRri32:$a))]>, Requires<[Use32BitAddresses]>;
244 def ri64 : InstPTX<(outs RC:$d),
246 !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
247 [(set RC:$d, (pat_load ADDRri64:$a))]>, Requires<[Use64BitAddresses]>;
248 def ii32 : InstPTX<(outs RC:$d),
250 !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
251 [(set RC:$d, (pat_load ADDRii32:$a))]>, Requires<[Use32BitAddresses]>;
252 def ii64 : InstPTX<(outs RC:$d),
254 !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
255 [(set RC:$d, (pat_load ADDRii64:$a))]>, Requires<[Use64BitAddresses]>;
258 multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
259 defm u16 : PTX_LD<opstr, ".u16", RRegu16, pat_load>;
260 defm u32 : PTX_LD<opstr, ".u32", RRegu32, pat_load>;
261 defm u64 : PTX_LD<opstr, ".u64", RRegu64, pat_load>;
262 defm f32 : PTX_LD<opstr, ".f32", RRegf32, pat_load>;
263 defm f64 : PTX_LD<opstr, ".f64", RRegf64, pat_load>;
266 multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> {
267 def rr32 : InstPTX<(outs),
268 (ins RC:$d, MEMri32:$a),
269 !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
270 [(pat_store RC:$d, ADDRrr32:$a)]>, Requires<[Use32BitAddresses]>;
271 def rr64 : InstPTX<(outs),
272 (ins RC:$d, MEMri64:$a),
273 !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
274 [(pat_store RC:$d, ADDRrr64:$a)]>, Requires<[Use64BitAddresses]>;
275 def ri32 : InstPTX<(outs),
276 (ins RC:$d, MEMri32:$a),
277 !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
278 [(pat_store RC:$d, ADDRri32:$a)]>, Requires<[Use32BitAddresses]>;
279 def ri64 : InstPTX<(outs),
280 (ins RC:$d, MEMri64:$a),
281 !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
282 [(pat_store RC:$d, ADDRri64:$a)]>, Requires<[Use64BitAddresses]>;
283 def ii32 : InstPTX<(outs),
284 (ins RC:$d, MEMii32:$a),
285 !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
286 [(pat_store RC:$d, ADDRii32:$a)]>, Requires<[Use32BitAddresses]>;
287 def ii64 : InstPTX<(outs),
288 (ins RC:$d, MEMii64:$a),
289 !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
290 [(pat_store RC:$d, ADDRii64:$a)]>, Requires<[Use64BitAddresses]>;
293 multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
294 defm u16 : PTX_ST<opstr, ".u16", RRegu16, pat_store>;
295 defm u32 : PTX_ST<opstr, ".u32", RRegu32, pat_store>;
296 defm u64 : PTX_ST<opstr, ".u64", RRegu64, pat_store>;
297 defm f32 : PTX_ST<opstr, ".f32", RRegf32, pat_store>;
298 defm f64 : PTX_ST<opstr, ".f64", RRegf64, pat_store>;
301 //===----------------------------------------------------------------------===//
303 //===----------------------------------------------------------------------===//
305 ///===- Floating-Point Arithmetic Instructions ----------------------------===//
307 defm FADD : FLOAT3<"add", fadd>;
308 defm FSUB : FLOAT3<"sub", fsub>;
309 defm FMUL : FLOAT3<"mul", fmul>;
311 ///===- Integer Arithmetic Instructions -----------------------------------===//
313 defm ADD : INT3<"add", add>;
314 defm SUB : INT3<"sub", sub>;
316 ///===- Logic and Shift Instructions --------------------------------------===//
318 defm SHL : INT3ntnc<"shl.b32", PTXshl>;
319 defm SRL : INT3ntnc<"shr.u32", PTXsrl>;
320 defm SRA : INT3ntnc<"shr.s32", PTXsra>;
322 ///===- Data Movement and Conversion Instructions -------------------------===//
324 let neverHasSideEffects = 1 in {
326 : InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>;
328 : InstPTX<(outs RRegu16:$d), (ins RRegu16:$a), "mov.u16\t$d, $a", []>;
330 : InstPTX<(outs RRegu32:$d), (ins RRegu32:$a), "mov.u32\t$d, $a", []>;
332 : InstPTX<(outs RRegu64:$d), (ins RRegu64:$a), "mov.u64\t$d, $a", []>;
334 : InstPTX<(outs RRegf32:$d), (ins RRegf32:$a), "mov.f32\t$d, $a", []>;
336 : InstPTX<(outs RRegf64:$d), (ins RRegf64:$a), "mov.f64\t$d, $a", []>;
339 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
341 : InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
342 [(set Preds:$d, imm:$a)]>;
344 : InstPTX<(outs RRegu16:$d), (ins i16imm:$a), "mov.u16\t$d, $a",
345 [(set RRegu16:$d, imm:$a)]>;
347 : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
348 [(set RRegu32:$d, imm:$a)]>;
350 : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
351 [(set RRegu64:$d, imm:$a)]>;
353 : InstPTX<(outs RRegf32:$d), (ins f32imm:$a), "mov.f32\t$d, $a",
354 [(set RRegf32:$d, fpimm:$a)]>;
356 : InstPTX<(outs RRegf64:$d), (ins f64imm:$a), "mov.f64\t$d, $a",
357 [(set RRegf64:$d, fpimm:$a)]>;
361 defm LDg : PTX_LD_ALL<"ld.global", load_global>;
362 defm LDc : PTX_LD_ALL<"ld.const", load_constant>;
363 defm LDl : PTX_LD_ALL<"ld.local", load_local>;
364 defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
366 // This is a special instruction that is manually inserted for kernel parameters
367 def LDpiU16 : InstPTX<(outs RRegu16:$d), (ins MEMpi:$a),
368 "ld.param.u16\t$d, [$a]", []>;
369 def LDpiU32 : InstPTX<(outs RRegu32:$d), (ins MEMpi:$a),
370 "ld.param.u32\t$d, [$a]", []>;
371 def LDpiU64 : InstPTX<(outs RRegu64:$d), (ins MEMpi:$a),
372 "ld.param.u64\t$d, [$a]", []>;
373 def LDpiF32 : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a),
374 "ld.param.f32\t$d, [$a]", []>;
375 def LDpiF64 : InstPTX<(outs RRegf64:$d), (ins MEMpi:$a),
376 "ld.param.f64\t$d, [$a]", []>;
379 defm STg : PTX_ST_ALL<"st.global", store_global>;
380 defm STl : PTX_ST_ALL<"st.local", store_local>;
381 defm STs : PTX_ST_ALL<"st.shared", store_shared>;
383 // defm STp : PTX_ST_ALL<"st.param", store_parameter>;
384 // defm LDp : PTX_LD_ALL<"ld.param", load_parameter>;
385 // TODO: Do something with st.param if/when it is needed.
387 ///===- Control Flow Instructions -----------------------------------------===//
389 let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
390 def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>;
391 def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>;