1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines an instruction selector for the NVPTX target.
12 //===----------------------------------------------------------------------===//
14 #include "NVPTXISelDAGToDAG.h"
15 #include "llvm/IR/GlobalValue.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/Target/TargetIntrinsicInfo.h"
25 #define DEBUG_TYPE "nvptx-isel"
27 unsigned FMAContractLevel = 0;
29 static cl::opt<unsigned, true>
30 FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
31 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
32 " 1: do it 2: do it aggressively"),
33 cl::location(FMAContractLevel),
36 static cl::opt<int> UsePrecDivF32(
37 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
38 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
39 " IEEE Compliant F32 div.rnd if avaiable."),
43 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
44 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
48 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
49 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
53 /// createNVPTXISelDag - This pass converts a legalized DAG into a
54 /// NVPTX-specific DAG, ready for instruction scheduling.
55 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
56 llvm::CodeGenOpt::Level OptLevel) {
57 return new NVPTXDAGToDAGISel(TM, OptLevel);
60 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
61 CodeGenOpt::Level OptLevel)
62 : SelectionDAGISel(tm, OptLevel),
63 Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
65 doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
66 doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
68 (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
70 (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
72 allowFMA = (FMAContractLevel >= 1);
74 doMulWide = (OptLevel > 0);
77 int NVPTXDAGToDAGISel::getDivF32Level() const {
78 if (UsePrecDivF32.getNumOccurrences() > 0) {
79 // If nvptx-prec-div32=N is used on the command-line, always honor it
82 // Otherwise, use div.approx if fast math is enabled
83 if (TM.Options.UnsafeFPMath)
90 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
91 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
92 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
93 return UsePrecSqrtF32;
95 // Otherwise, use sqrt.approx if fast math is enabled
96 if (TM.Options.UnsafeFPMath)
103 bool NVPTXDAGToDAGISel::useF32FTZ() const {
104 if (FtzEnabled.getNumOccurrences() > 0) {
105 // If nvptx-f32ftz is used on the command-line, always honor it
108 const Function *F = MF->getFunction();
109 // Otherwise, check for an nvptx-f32ftz attribute on the function
110 if (F->hasFnAttribute("nvptx-f32ftz"))
111 return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex,
113 .getValueAsString() == "true");
119 /// Select - Select instructions not customized! Used for
120 /// expanded, promoted and normal instructions.
121 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
123 if (N->isMachineOpcode()) {
125 return nullptr; // Already selected.
128 SDNode *ResNode = nullptr;
129 switch (N->getOpcode()) {
131 ResNode = SelectLoad(N);
134 ResNode = SelectStore(N);
136 case NVPTXISD::LoadV2:
137 case NVPTXISD::LoadV4:
138 ResNode = SelectLoadVector(N);
140 case NVPTXISD::LDGV2:
141 case NVPTXISD::LDGV4:
142 case NVPTXISD::LDUV2:
143 case NVPTXISD::LDUV4:
144 ResNode = SelectLDGLDU(N);
146 case NVPTXISD::StoreV2:
147 case NVPTXISD::StoreV4:
148 ResNode = SelectStoreVector(N);
150 case NVPTXISD::LoadParam:
151 case NVPTXISD::LoadParamV2:
152 case NVPTXISD::LoadParamV4:
153 ResNode = SelectLoadParam(N);
155 case NVPTXISD::StoreRetval:
156 case NVPTXISD::StoreRetvalV2:
157 case NVPTXISD::StoreRetvalV4:
158 ResNode = SelectStoreRetval(N);
160 case NVPTXISD::StoreParam:
161 case NVPTXISD::StoreParamV2:
162 case NVPTXISD::StoreParamV4:
163 case NVPTXISD::StoreParamS32:
164 case NVPTXISD::StoreParamU32:
165 ResNode = SelectStoreParam(N);
167 case ISD::INTRINSIC_WO_CHAIN:
168 ResNode = SelectIntrinsicNoChain(N);
170 case ISD::INTRINSIC_W_CHAIN:
171 ResNode = SelectIntrinsicChain(N);
173 case NVPTXISD::Tex1DFloatS32:
174 case NVPTXISD::Tex1DFloatFloat:
175 case NVPTXISD::Tex1DFloatFloatLevel:
176 case NVPTXISD::Tex1DFloatFloatGrad:
177 case NVPTXISD::Tex1DS32S32:
178 case NVPTXISD::Tex1DS32Float:
179 case NVPTXISD::Tex1DS32FloatLevel:
180 case NVPTXISD::Tex1DS32FloatGrad:
181 case NVPTXISD::Tex1DU32S32:
182 case NVPTXISD::Tex1DU32Float:
183 case NVPTXISD::Tex1DU32FloatLevel:
184 case NVPTXISD::Tex1DU32FloatGrad:
185 case NVPTXISD::Tex1DArrayFloatS32:
186 case NVPTXISD::Tex1DArrayFloatFloat:
187 case NVPTXISD::Tex1DArrayFloatFloatLevel:
188 case NVPTXISD::Tex1DArrayFloatFloatGrad:
189 case NVPTXISD::Tex1DArrayS32S32:
190 case NVPTXISD::Tex1DArrayS32Float:
191 case NVPTXISD::Tex1DArrayS32FloatLevel:
192 case NVPTXISD::Tex1DArrayS32FloatGrad:
193 case NVPTXISD::Tex1DArrayU32S32:
194 case NVPTXISD::Tex1DArrayU32Float:
195 case NVPTXISD::Tex1DArrayU32FloatLevel:
196 case NVPTXISD::Tex1DArrayU32FloatGrad:
197 case NVPTXISD::Tex2DFloatS32:
198 case NVPTXISD::Tex2DFloatFloat:
199 case NVPTXISD::Tex2DFloatFloatLevel:
200 case NVPTXISD::Tex2DFloatFloatGrad:
201 case NVPTXISD::Tex2DS32S32:
202 case NVPTXISD::Tex2DS32Float:
203 case NVPTXISD::Tex2DS32FloatLevel:
204 case NVPTXISD::Tex2DS32FloatGrad:
205 case NVPTXISD::Tex2DU32S32:
206 case NVPTXISD::Tex2DU32Float:
207 case NVPTXISD::Tex2DU32FloatLevel:
208 case NVPTXISD::Tex2DU32FloatGrad:
209 case NVPTXISD::Tex2DArrayFloatS32:
210 case NVPTXISD::Tex2DArrayFloatFloat:
211 case NVPTXISD::Tex2DArrayFloatFloatLevel:
212 case NVPTXISD::Tex2DArrayFloatFloatGrad:
213 case NVPTXISD::Tex2DArrayS32S32:
214 case NVPTXISD::Tex2DArrayS32Float:
215 case NVPTXISD::Tex2DArrayS32FloatLevel:
216 case NVPTXISD::Tex2DArrayS32FloatGrad:
217 case NVPTXISD::Tex2DArrayU32S32:
218 case NVPTXISD::Tex2DArrayU32Float:
219 case NVPTXISD::Tex2DArrayU32FloatLevel:
220 case NVPTXISD::Tex2DArrayU32FloatGrad:
221 case NVPTXISD::Tex3DFloatS32:
222 case NVPTXISD::Tex3DFloatFloat:
223 case NVPTXISD::Tex3DFloatFloatLevel:
224 case NVPTXISD::Tex3DFloatFloatGrad:
225 case NVPTXISD::Tex3DS32S32:
226 case NVPTXISD::Tex3DS32Float:
227 case NVPTXISD::Tex3DS32FloatLevel:
228 case NVPTXISD::Tex3DS32FloatGrad:
229 case NVPTXISD::Tex3DU32S32:
230 case NVPTXISD::Tex3DU32Float:
231 case NVPTXISD::Tex3DU32FloatLevel:
232 case NVPTXISD::Tex3DU32FloatGrad:
233 case NVPTXISD::TexCubeFloatFloat:
234 case NVPTXISD::TexCubeFloatFloatLevel:
235 case NVPTXISD::TexCubeS32Float:
236 case NVPTXISD::TexCubeS32FloatLevel:
237 case NVPTXISD::TexCubeU32Float:
238 case NVPTXISD::TexCubeU32FloatLevel:
239 case NVPTXISD::TexCubeArrayFloatFloat:
240 case NVPTXISD::TexCubeArrayFloatFloatLevel:
241 case NVPTXISD::TexCubeArrayS32Float:
242 case NVPTXISD::TexCubeArrayS32FloatLevel:
243 case NVPTXISD::TexCubeArrayU32Float:
244 case NVPTXISD::TexCubeArrayU32FloatLevel:
245 case NVPTXISD::Tld4R2DFloatFloat:
246 case NVPTXISD::Tld4G2DFloatFloat:
247 case NVPTXISD::Tld4B2DFloatFloat:
248 case NVPTXISD::Tld4A2DFloatFloat:
249 case NVPTXISD::Tld4R2DS64Float:
250 case NVPTXISD::Tld4G2DS64Float:
251 case NVPTXISD::Tld4B2DS64Float:
252 case NVPTXISD::Tld4A2DS64Float:
253 case NVPTXISD::Tld4R2DU64Float:
254 case NVPTXISD::Tld4G2DU64Float:
255 case NVPTXISD::Tld4B2DU64Float:
256 case NVPTXISD::Tld4A2DU64Float:
257 case NVPTXISD::TexUnified1DFloatS32:
258 case NVPTXISD::TexUnified1DFloatFloat:
259 case NVPTXISD::TexUnified1DFloatFloatLevel:
260 case NVPTXISD::TexUnified1DFloatFloatGrad:
261 case NVPTXISD::TexUnified1DS32S32:
262 case NVPTXISD::TexUnified1DS32Float:
263 case NVPTXISD::TexUnified1DS32FloatLevel:
264 case NVPTXISD::TexUnified1DS32FloatGrad:
265 case NVPTXISD::TexUnified1DU32S32:
266 case NVPTXISD::TexUnified1DU32Float:
267 case NVPTXISD::TexUnified1DU32FloatLevel:
268 case NVPTXISD::TexUnified1DU32FloatGrad:
269 case NVPTXISD::TexUnified1DArrayFloatS32:
270 case NVPTXISD::TexUnified1DArrayFloatFloat:
271 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
272 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
273 case NVPTXISD::TexUnified1DArrayS32S32:
274 case NVPTXISD::TexUnified1DArrayS32Float:
275 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
276 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
277 case NVPTXISD::TexUnified1DArrayU32S32:
278 case NVPTXISD::TexUnified1DArrayU32Float:
279 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
280 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
281 case NVPTXISD::TexUnified2DFloatS32:
282 case NVPTXISD::TexUnified2DFloatFloat:
283 case NVPTXISD::TexUnified2DFloatFloatLevel:
284 case NVPTXISD::TexUnified2DFloatFloatGrad:
285 case NVPTXISD::TexUnified2DS32S32:
286 case NVPTXISD::TexUnified2DS32Float:
287 case NVPTXISD::TexUnified2DS32FloatLevel:
288 case NVPTXISD::TexUnified2DS32FloatGrad:
289 case NVPTXISD::TexUnified2DU32S32:
290 case NVPTXISD::TexUnified2DU32Float:
291 case NVPTXISD::TexUnified2DU32FloatLevel:
292 case NVPTXISD::TexUnified2DU32FloatGrad:
293 case NVPTXISD::TexUnified2DArrayFloatS32:
294 case NVPTXISD::TexUnified2DArrayFloatFloat:
295 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
296 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
297 case NVPTXISD::TexUnified2DArrayS32S32:
298 case NVPTXISD::TexUnified2DArrayS32Float:
299 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
300 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
301 case NVPTXISD::TexUnified2DArrayU32S32:
302 case NVPTXISD::TexUnified2DArrayU32Float:
303 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
304 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
305 case NVPTXISD::TexUnified3DFloatS32:
306 case NVPTXISD::TexUnified3DFloatFloat:
307 case NVPTXISD::TexUnified3DFloatFloatLevel:
308 case NVPTXISD::TexUnified3DFloatFloatGrad:
309 case NVPTXISD::TexUnified3DS32S32:
310 case NVPTXISD::TexUnified3DS32Float:
311 case NVPTXISD::TexUnified3DS32FloatLevel:
312 case NVPTXISD::TexUnified3DS32FloatGrad:
313 case NVPTXISD::TexUnified3DU32S32:
314 case NVPTXISD::TexUnified3DU32Float:
315 case NVPTXISD::TexUnified3DU32FloatLevel:
316 case NVPTXISD::TexUnified3DU32FloatGrad:
317 case NVPTXISD::TexUnifiedCubeFloatFloat:
318 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
319 case NVPTXISD::TexUnifiedCubeS32Float:
320 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
321 case NVPTXISD::TexUnifiedCubeU32Float:
322 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
323 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
324 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
325 case NVPTXISD::TexUnifiedCubeArrayS32Float:
326 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
327 case NVPTXISD::TexUnifiedCubeArrayU32Float:
328 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
329 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
330 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
331 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
332 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
333 case NVPTXISD::Tld4UnifiedR2DS64Float:
334 case NVPTXISD::Tld4UnifiedG2DS64Float:
335 case NVPTXISD::Tld4UnifiedB2DS64Float:
336 case NVPTXISD::Tld4UnifiedA2DS64Float:
337 case NVPTXISD::Tld4UnifiedR2DU64Float:
338 case NVPTXISD::Tld4UnifiedG2DU64Float:
339 case NVPTXISD::Tld4UnifiedB2DU64Float:
340 case NVPTXISD::Tld4UnifiedA2DU64Float:
341 ResNode = SelectTextureIntrinsic(N);
343 case NVPTXISD::Suld1DI8Clamp:
344 case NVPTXISD::Suld1DI16Clamp:
345 case NVPTXISD::Suld1DI32Clamp:
346 case NVPTXISD::Suld1DI64Clamp:
347 case NVPTXISD::Suld1DV2I8Clamp:
348 case NVPTXISD::Suld1DV2I16Clamp:
349 case NVPTXISD::Suld1DV2I32Clamp:
350 case NVPTXISD::Suld1DV2I64Clamp:
351 case NVPTXISD::Suld1DV4I8Clamp:
352 case NVPTXISD::Suld1DV4I16Clamp:
353 case NVPTXISD::Suld1DV4I32Clamp:
354 case NVPTXISD::Suld1DArrayI8Clamp:
355 case NVPTXISD::Suld1DArrayI16Clamp:
356 case NVPTXISD::Suld1DArrayI32Clamp:
357 case NVPTXISD::Suld1DArrayI64Clamp:
358 case NVPTXISD::Suld1DArrayV2I8Clamp:
359 case NVPTXISD::Suld1DArrayV2I16Clamp:
360 case NVPTXISD::Suld1DArrayV2I32Clamp:
361 case NVPTXISD::Suld1DArrayV2I64Clamp:
362 case NVPTXISD::Suld1DArrayV4I8Clamp:
363 case NVPTXISD::Suld1DArrayV4I16Clamp:
364 case NVPTXISD::Suld1DArrayV4I32Clamp:
365 case NVPTXISD::Suld2DI8Clamp:
366 case NVPTXISD::Suld2DI16Clamp:
367 case NVPTXISD::Suld2DI32Clamp:
368 case NVPTXISD::Suld2DI64Clamp:
369 case NVPTXISD::Suld2DV2I8Clamp:
370 case NVPTXISD::Suld2DV2I16Clamp:
371 case NVPTXISD::Suld2DV2I32Clamp:
372 case NVPTXISD::Suld2DV2I64Clamp:
373 case NVPTXISD::Suld2DV4I8Clamp:
374 case NVPTXISD::Suld2DV4I16Clamp:
375 case NVPTXISD::Suld2DV4I32Clamp:
376 case NVPTXISD::Suld2DArrayI8Clamp:
377 case NVPTXISD::Suld2DArrayI16Clamp:
378 case NVPTXISD::Suld2DArrayI32Clamp:
379 case NVPTXISD::Suld2DArrayI64Clamp:
380 case NVPTXISD::Suld2DArrayV2I8Clamp:
381 case NVPTXISD::Suld2DArrayV2I16Clamp:
382 case NVPTXISD::Suld2DArrayV2I32Clamp:
383 case NVPTXISD::Suld2DArrayV2I64Clamp:
384 case NVPTXISD::Suld2DArrayV4I8Clamp:
385 case NVPTXISD::Suld2DArrayV4I16Clamp:
386 case NVPTXISD::Suld2DArrayV4I32Clamp:
387 case NVPTXISD::Suld3DI8Clamp:
388 case NVPTXISD::Suld3DI16Clamp:
389 case NVPTXISD::Suld3DI32Clamp:
390 case NVPTXISD::Suld3DI64Clamp:
391 case NVPTXISD::Suld3DV2I8Clamp:
392 case NVPTXISD::Suld3DV2I16Clamp:
393 case NVPTXISD::Suld3DV2I32Clamp:
394 case NVPTXISD::Suld3DV2I64Clamp:
395 case NVPTXISD::Suld3DV4I8Clamp:
396 case NVPTXISD::Suld3DV4I16Clamp:
397 case NVPTXISD::Suld3DV4I32Clamp:
398 case NVPTXISD::Suld1DI8Trap:
399 case NVPTXISD::Suld1DI16Trap:
400 case NVPTXISD::Suld1DI32Trap:
401 case NVPTXISD::Suld1DI64Trap:
402 case NVPTXISD::Suld1DV2I8Trap:
403 case NVPTXISD::Suld1DV2I16Trap:
404 case NVPTXISD::Suld1DV2I32Trap:
405 case NVPTXISD::Suld1DV2I64Trap:
406 case NVPTXISD::Suld1DV4I8Trap:
407 case NVPTXISD::Suld1DV4I16Trap:
408 case NVPTXISD::Suld1DV4I32Trap:
409 case NVPTXISD::Suld1DArrayI8Trap:
410 case NVPTXISD::Suld1DArrayI16Trap:
411 case NVPTXISD::Suld1DArrayI32Trap:
412 case NVPTXISD::Suld1DArrayI64Trap:
413 case NVPTXISD::Suld1DArrayV2I8Trap:
414 case NVPTXISD::Suld1DArrayV2I16Trap:
415 case NVPTXISD::Suld1DArrayV2I32Trap:
416 case NVPTXISD::Suld1DArrayV2I64Trap:
417 case NVPTXISD::Suld1DArrayV4I8Trap:
418 case NVPTXISD::Suld1DArrayV4I16Trap:
419 case NVPTXISD::Suld1DArrayV4I32Trap:
420 case NVPTXISD::Suld2DI8Trap:
421 case NVPTXISD::Suld2DI16Trap:
422 case NVPTXISD::Suld2DI32Trap:
423 case NVPTXISD::Suld2DI64Trap:
424 case NVPTXISD::Suld2DV2I8Trap:
425 case NVPTXISD::Suld2DV2I16Trap:
426 case NVPTXISD::Suld2DV2I32Trap:
427 case NVPTXISD::Suld2DV2I64Trap:
428 case NVPTXISD::Suld2DV4I8Trap:
429 case NVPTXISD::Suld2DV4I16Trap:
430 case NVPTXISD::Suld2DV4I32Trap:
431 case NVPTXISD::Suld2DArrayI8Trap:
432 case NVPTXISD::Suld2DArrayI16Trap:
433 case NVPTXISD::Suld2DArrayI32Trap:
434 case NVPTXISD::Suld2DArrayI64Trap:
435 case NVPTXISD::Suld2DArrayV2I8Trap:
436 case NVPTXISD::Suld2DArrayV2I16Trap:
437 case NVPTXISD::Suld2DArrayV2I32Trap:
438 case NVPTXISD::Suld2DArrayV2I64Trap:
439 case NVPTXISD::Suld2DArrayV4I8Trap:
440 case NVPTXISD::Suld2DArrayV4I16Trap:
441 case NVPTXISD::Suld2DArrayV4I32Trap:
442 case NVPTXISD::Suld3DI8Trap:
443 case NVPTXISD::Suld3DI16Trap:
444 case NVPTXISD::Suld3DI32Trap:
445 case NVPTXISD::Suld3DI64Trap:
446 case NVPTXISD::Suld3DV2I8Trap:
447 case NVPTXISD::Suld3DV2I16Trap:
448 case NVPTXISD::Suld3DV2I32Trap:
449 case NVPTXISD::Suld3DV2I64Trap:
450 case NVPTXISD::Suld3DV4I8Trap:
451 case NVPTXISD::Suld3DV4I16Trap:
452 case NVPTXISD::Suld3DV4I32Trap:
453 case NVPTXISD::Suld1DI8Zero:
454 case NVPTXISD::Suld1DI16Zero:
455 case NVPTXISD::Suld1DI32Zero:
456 case NVPTXISD::Suld1DI64Zero:
457 case NVPTXISD::Suld1DV2I8Zero:
458 case NVPTXISD::Suld1DV2I16Zero:
459 case NVPTXISD::Suld1DV2I32Zero:
460 case NVPTXISD::Suld1DV2I64Zero:
461 case NVPTXISD::Suld1DV4I8Zero:
462 case NVPTXISD::Suld1DV4I16Zero:
463 case NVPTXISD::Suld1DV4I32Zero:
464 case NVPTXISD::Suld1DArrayI8Zero:
465 case NVPTXISD::Suld1DArrayI16Zero:
466 case NVPTXISD::Suld1DArrayI32Zero:
467 case NVPTXISD::Suld1DArrayI64Zero:
468 case NVPTXISD::Suld1DArrayV2I8Zero:
469 case NVPTXISD::Suld1DArrayV2I16Zero:
470 case NVPTXISD::Suld1DArrayV2I32Zero:
471 case NVPTXISD::Suld1DArrayV2I64Zero:
472 case NVPTXISD::Suld1DArrayV4I8Zero:
473 case NVPTXISD::Suld1DArrayV4I16Zero:
474 case NVPTXISD::Suld1DArrayV4I32Zero:
475 case NVPTXISD::Suld2DI8Zero:
476 case NVPTXISD::Suld2DI16Zero:
477 case NVPTXISD::Suld2DI32Zero:
478 case NVPTXISD::Suld2DI64Zero:
479 case NVPTXISD::Suld2DV2I8Zero:
480 case NVPTXISD::Suld2DV2I16Zero:
481 case NVPTXISD::Suld2DV2I32Zero:
482 case NVPTXISD::Suld2DV2I64Zero:
483 case NVPTXISD::Suld2DV4I8Zero:
484 case NVPTXISD::Suld2DV4I16Zero:
485 case NVPTXISD::Suld2DV4I32Zero:
486 case NVPTXISD::Suld2DArrayI8Zero:
487 case NVPTXISD::Suld2DArrayI16Zero:
488 case NVPTXISD::Suld2DArrayI32Zero:
489 case NVPTXISD::Suld2DArrayI64Zero:
490 case NVPTXISD::Suld2DArrayV2I8Zero:
491 case NVPTXISD::Suld2DArrayV2I16Zero:
492 case NVPTXISD::Suld2DArrayV2I32Zero:
493 case NVPTXISD::Suld2DArrayV2I64Zero:
494 case NVPTXISD::Suld2DArrayV4I8Zero:
495 case NVPTXISD::Suld2DArrayV4I16Zero:
496 case NVPTXISD::Suld2DArrayV4I32Zero:
497 case NVPTXISD::Suld3DI8Zero:
498 case NVPTXISD::Suld3DI16Zero:
499 case NVPTXISD::Suld3DI32Zero:
500 case NVPTXISD::Suld3DI64Zero:
501 case NVPTXISD::Suld3DV2I8Zero:
502 case NVPTXISD::Suld3DV2I16Zero:
503 case NVPTXISD::Suld3DV2I32Zero:
504 case NVPTXISD::Suld3DV2I64Zero:
505 case NVPTXISD::Suld3DV4I8Zero:
506 case NVPTXISD::Suld3DV4I16Zero:
507 case NVPTXISD::Suld3DV4I32Zero:
508 ResNode = SelectSurfaceIntrinsic(N);
514 ResNode = SelectBFE(N);
516 case ISD::ADDRSPACECAST:
517 ResNode = SelectAddrSpaceCast(N);
524 return SelectCode(N);
527 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
528 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
532 case Intrinsic::nvvm_ldg_global_f:
533 case Intrinsic::nvvm_ldg_global_i:
534 case Intrinsic::nvvm_ldg_global_p:
535 case Intrinsic::nvvm_ldu_global_f:
536 case Intrinsic::nvvm_ldu_global_i:
537 case Intrinsic::nvvm_ldu_global_p:
538 return SelectLDGLDU(N);
542 static unsigned int getCodeAddrSpace(MemSDNode *N,
543 const NVPTXSubtarget &Subtarget) {
544 const Value *Src = N->getMemOperand()->getValue();
547 return NVPTX::PTXLdStInstCode::GENERIC;
549 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
550 switch (PT->getAddressSpace()) {
551 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
552 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
553 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
554 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
555 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
556 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
560 return NVPTX::PTXLdStInstCode::GENERIC;
563 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
564 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
568 case Intrinsic::nvvm_texsurf_handle_internal:
569 return SelectTexSurfHandle(N);
573 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
574 // Op 0 is the intrinsic ID
575 SDValue Wrapper = N->getOperand(1);
576 SDValue GlobalVal = Wrapper.getOperand(0);
577 return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
581 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
582 SDValue Src = N->getOperand(0);
583 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
584 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
585 unsigned DstAddrSpace = CastN->getDestAddressSpace();
587 assert(SrcAddrSpace != DstAddrSpace &&
588 "addrspacecast must be between different address spaces");
590 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
591 // Specific to generic
593 switch (SrcAddrSpace) {
594 default: report_fatal_error("Bad address space in addrspacecast");
595 case ADDRESS_SPACE_GLOBAL:
596 Opc = Subtarget.is64Bit() ? NVPTX::cvta_global_yes_64
597 : NVPTX::cvta_global_yes;
599 case ADDRESS_SPACE_SHARED:
600 Opc = Subtarget.is64Bit() ? NVPTX::cvta_shared_yes_64
601 : NVPTX::cvta_shared_yes;
603 case ADDRESS_SPACE_CONST:
604 Opc = Subtarget.is64Bit() ? NVPTX::cvta_const_yes_64
605 : NVPTX::cvta_const_yes;
607 case ADDRESS_SPACE_LOCAL:
608 Opc = Subtarget.is64Bit() ? NVPTX::cvta_local_yes_64
609 : NVPTX::cvta_local_yes;
612 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
614 // Generic to specific
615 if (SrcAddrSpace != 0)
616 report_fatal_error("Cannot cast between two non-generic address spaces");
618 switch (DstAddrSpace) {
619 default: report_fatal_error("Bad address space in addrspacecast");
620 case ADDRESS_SPACE_GLOBAL:
621 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_global_yes_64
622 : NVPTX::cvta_to_global_yes;
624 case ADDRESS_SPACE_SHARED:
625 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_shared_yes_64
626 : NVPTX::cvta_to_shared_yes;
628 case ADDRESS_SPACE_CONST:
629 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_const_yes_64
630 : NVPTX::cvta_to_const_yes;
632 case ADDRESS_SPACE_LOCAL:
633 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_local_yes_64
634 : NVPTX::cvta_to_local_yes;
637 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
641 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
643 LoadSDNode *LD = cast<LoadSDNode>(N);
644 EVT LoadedVT = LD->getMemoryVT();
645 SDNode *NVPTXLD = nullptr;
647 // do not support pre/post inc/dec
651 if (!LoadedVT.isSimple())
654 // Address Space Setting
655 unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
658 // - .volatile is only availalble for .global and .shared
659 bool isVolatile = LD->isVolatile();
660 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
661 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
662 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
666 MVT SimpleVT = LoadedVT.getSimpleVT();
667 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
668 if (SimpleVT.isVector()) {
669 unsigned num = SimpleVT.getVectorNumElements();
671 vecType = NVPTX::PTXLdStInstCode::V2;
673 vecType = NVPTX::PTXLdStInstCode::V4;
678 // Type Setting: fromType + fromTypeWidth
680 // Sign : ISD::SEXTLOAD
681 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
683 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
684 MVT ScalarVT = SimpleVT.getScalarType();
685 // Read at least 8 bits (predicates are stored as 8-bit values)
686 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
687 unsigned int fromType;
688 if ((LD->getExtensionType() == ISD::SEXTLOAD))
689 fromType = NVPTX::PTXLdStInstCode::Signed;
690 else if (ScalarVT.isFloatingPoint())
691 fromType = NVPTX::PTXLdStInstCode::Float;
693 fromType = NVPTX::PTXLdStInstCode::Unsigned;
695 // Create the machine instruction DAG
696 SDValue Chain = N->getOperand(0);
697 SDValue N1 = N->getOperand(1);
699 SDValue Offset, Base;
701 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
703 if (SelectDirectAddr(N1, Addr)) {
706 Opcode = NVPTX::LD_i8_avar;
709 Opcode = NVPTX::LD_i16_avar;
712 Opcode = NVPTX::LD_i32_avar;
715 Opcode = NVPTX::LD_i64_avar;
718 Opcode = NVPTX::LD_f32_avar;
721 Opcode = NVPTX::LD_f64_avar;
726 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
727 getI32Imm(vecType), getI32Imm(fromType),
728 getI32Imm(fromTypeWidth), Addr, Chain };
729 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
730 } else if (Subtarget.is64Bit()
731 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
732 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
735 Opcode = NVPTX::LD_i8_asi;
738 Opcode = NVPTX::LD_i16_asi;
741 Opcode = NVPTX::LD_i32_asi;
744 Opcode = NVPTX::LD_i64_asi;
747 Opcode = NVPTX::LD_f32_asi;
750 Opcode = NVPTX::LD_f64_asi;
755 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
756 getI32Imm(vecType), getI32Imm(fromType),
757 getI32Imm(fromTypeWidth), Base, Offset, Chain };
758 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
759 } else if (Subtarget.is64Bit()
760 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
761 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
762 if (Subtarget.is64Bit()) {
765 Opcode = NVPTX::LD_i8_ari_64;
768 Opcode = NVPTX::LD_i16_ari_64;
771 Opcode = NVPTX::LD_i32_ari_64;
774 Opcode = NVPTX::LD_i64_ari_64;
777 Opcode = NVPTX::LD_f32_ari_64;
780 Opcode = NVPTX::LD_f64_ari_64;
788 Opcode = NVPTX::LD_i8_ari;
791 Opcode = NVPTX::LD_i16_ari;
794 Opcode = NVPTX::LD_i32_ari;
797 Opcode = NVPTX::LD_i64_ari;
800 Opcode = NVPTX::LD_f32_ari;
803 Opcode = NVPTX::LD_f64_ari;
809 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
810 getI32Imm(vecType), getI32Imm(fromType),
811 getI32Imm(fromTypeWidth), Base, Offset, Chain };
812 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
814 if (Subtarget.is64Bit()) {
817 Opcode = NVPTX::LD_i8_areg_64;
820 Opcode = NVPTX::LD_i16_areg_64;
823 Opcode = NVPTX::LD_i32_areg_64;
826 Opcode = NVPTX::LD_i64_areg_64;
829 Opcode = NVPTX::LD_f32_areg_64;
832 Opcode = NVPTX::LD_f64_areg_64;
840 Opcode = NVPTX::LD_i8_areg;
843 Opcode = NVPTX::LD_i16_areg;
846 Opcode = NVPTX::LD_i32_areg;
849 Opcode = NVPTX::LD_i64_areg;
852 Opcode = NVPTX::LD_f32_areg;
855 Opcode = NVPTX::LD_f64_areg;
861 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
862 getI32Imm(vecType), getI32Imm(fromType),
863 getI32Imm(fromTypeWidth), N1, Chain };
864 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
868 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
869 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
870 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
876 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
878 SDValue Chain = N->getOperand(0);
879 SDValue Op1 = N->getOperand(1);
880 SDValue Addr, Offset, Base;
884 MemSDNode *MemSD = cast<MemSDNode>(N);
885 EVT LoadedVT = MemSD->getMemoryVT();
887 if (!LoadedVT.isSimple())
890 // Address Space Setting
891 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
894 // - .volatile is only availalble for .global and .shared
895 bool IsVolatile = MemSD->isVolatile();
896 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
897 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
898 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
902 MVT SimpleVT = LoadedVT.getSimpleVT();
904 // Type Setting: fromType + fromTypeWidth
906 // Sign : ISD::SEXTLOAD
907 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
909 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
910 MVT ScalarVT = SimpleVT.getScalarType();
911 // Read at least 8 bits (predicates are stored as 8-bit values)
912 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
913 unsigned int FromType;
914 // The last operand holds the original LoadSDNode::getExtensionType() value
915 unsigned ExtensionType = cast<ConstantSDNode>(
916 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
917 if (ExtensionType == ISD::SEXTLOAD)
918 FromType = NVPTX::PTXLdStInstCode::Signed;
919 else if (ScalarVT.isFloatingPoint())
920 FromType = NVPTX::PTXLdStInstCode::Float;
922 FromType = NVPTX::PTXLdStInstCode::Unsigned;
926 switch (N->getOpcode()) {
927 case NVPTXISD::LoadV2:
928 VecType = NVPTX::PTXLdStInstCode::V2;
930 case NVPTXISD::LoadV4:
931 VecType = NVPTX::PTXLdStInstCode::V4;
937 EVT EltVT = N->getValueType(0);
939 if (SelectDirectAddr(Op1, Addr)) {
940 switch (N->getOpcode()) {
943 case NVPTXISD::LoadV2:
944 switch (EltVT.getSimpleVT().SimpleTy) {
948 Opcode = NVPTX::LDV_i8_v2_avar;
951 Opcode = NVPTX::LDV_i16_v2_avar;
954 Opcode = NVPTX::LDV_i32_v2_avar;
957 Opcode = NVPTX::LDV_i64_v2_avar;
960 Opcode = NVPTX::LDV_f32_v2_avar;
963 Opcode = NVPTX::LDV_f64_v2_avar;
967 case NVPTXISD::LoadV4:
968 switch (EltVT.getSimpleVT().SimpleTy) {
972 Opcode = NVPTX::LDV_i8_v4_avar;
975 Opcode = NVPTX::LDV_i16_v4_avar;
978 Opcode = NVPTX::LDV_i32_v4_avar;
981 Opcode = NVPTX::LDV_f32_v4_avar;
987 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
988 getI32Imm(VecType), getI32Imm(FromType),
989 getI32Imm(FromTypeWidth), Addr, Chain };
990 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
991 } else if (Subtarget.is64Bit()
992 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
993 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
994 switch (N->getOpcode()) {
997 case NVPTXISD::LoadV2:
998 switch (EltVT.getSimpleVT().SimpleTy) {
1002 Opcode = NVPTX::LDV_i8_v2_asi;
1005 Opcode = NVPTX::LDV_i16_v2_asi;
1008 Opcode = NVPTX::LDV_i32_v2_asi;
1011 Opcode = NVPTX::LDV_i64_v2_asi;
1014 Opcode = NVPTX::LDV_f32_v2_asi;
1017 Opcode = NVPTX::LDV_f64_v2_asi;
1021 case NVPTXISD::LoadV4:
1022 switch (EltVT.getSimpleVT().SimpleTy) {
1026 Opcode = NVPTX::LDV_i8_v4_asi;
1029 Opcode = NVPTX::LDV_i16_v4_asi;
1032 Opcode = NVPTX::LDV_i32_v4_asi;
1035 Opcode = NVPTX::LDV_f32_v4_asi;
1041 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1042 getI32Imm(VecType), getI32Imm(FromType),
1043 getI32Imm(FromTypeWidth), Base, Offset, Chain };
1044 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1045 } else if (Subtarget.is64Bit()
1046 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1047 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1048 if (Subtarget.is64Bit()) {
1049 switch (N->getOpcode()) {
1052 case NVPTXISD::LoadV2:
1053 switch (EltVT.getSimpleVT().SimpleTy) {
1057 Opcode = NVPTX::LDV_i8_v2_ari_64;
1060 Opcode = NVPTX::LDV_i16_v2_ari_64;
1063 Opcode = NVPTX::LDV_i32_v2_ari_64;
1066 Opcode = NVPTX::LDV_i64_v2_ari_64;
1069 Opcode = NVPTX::LDV_f32_v2_ari_64;
1072 Opcode = NVPTX::LDV_f64_v2_ari_64;
1076 case NVPTXISD::LoadV4:
1077 switch (EltVT.getSimpleVT().SimpleTy) {
1081 Opcode = NVPTX::LDV_i8_v4_ari_64;
1084 Opcode = NVPTX::LDV_i16_v4_ari_64;
1087 Opcode = NVPTX::LDV_i32_v4_ari_64;
1090 Opcode = NVPTX::LDV_f32_v4_ari_64;
1096 switch (N->getOpcode()) {
1099 case NVPTXISD::LoadV2:
1100 switch (EltVT.getSimpleVT().SimpleTy) {
1104 Opcode = NVPTX::LDV_i8_v2_ari;
1107 Opcode = NVPTX::LDV_i16_v2_ari;
1110 Opcode = NVPTX::LDV_i32_v2_ari;
1113 Opcode = NVPTX::LDV_i64_v2_ari;
1116 Opcode = NVPTX::LDV_f32_v2_ari;
1119 Opcode = NVPTX::LDV_f64_v2_ari;
1123 case NVPTXISD::LoadV4:
1124 switch (EltVT.getSimpleVT().SimpleTy) {
1128 Opcode = NVPTX::LDV_i8_v4_ari;
1131 Opcode = NVPTX::LDV_i16_v4_ari;
1134 Opcode = NVPTX::LDV_i32_v4_ari;
1137 Opcode = NVPTX::LDV_f32_v4_ari;
1144 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1145 getI32Imm(VecType), getI32Imm(FromType),
1146 getI32Imm(FromTypeWidth), Base, Offset, Chain };
1148 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1150 if (Subtarget.is64Bit()) {
1151 switch (N->getOpcode()) {
1154 case NVPTXISD::LoadV2:
1155 switch (EltVT.getSimpleVT().SimpleTy) {
1159 Opcode = NVPTX::LDV_i8_v2_areg_64;
1162 Opcode = NVPTX::LDV_i16_v2_areg_64;
1165 Opcode = NVPTX::LDV_i32_v2_areg_64;
1168 Opcode = NVPTX::LDV_i64_v2_areg_64;
1171 Opcode = NVPTX::LDV_f32_v2_areg_64;
1174 Opcode = NVPTX::LDV_f64_v2_areg_64;
1178 case NVPTXISD::LoadV4:
1179 switch (EltVT.getSimpleVT().SimpleTy) {
1183 Opcode = NVPTX::LDV_i8_v4_areg_64;
1186 Opcode = NVPTX::LDV_i16_v4_areg_64;
1189 Opcode = NVPTX::LDV_i32_v4_areg_64;
1192 Opcode = NVPTX::LDV_f32_v4_areg_64;
1198 switch (N->getOpcode()) {
1201 case NVPTXISD::LoadV2:
1202 switch (EltVT.getSimpleVT().SimpleTy) {
1206 Opcode = NVPTX::LDV_i8_v2_areg;
1209 Opcode = NVPTX::LDV_i16_v2_areg;
1212 Opcode = NVPTX::LDV_i32_v2_areg;
1215 Opcode = NVPTX::LDV_i64_v2_areg;
1218 Opcode = NVPTX::LDV_f32_v2_areg;
1221 Opcode = NVPTX::LDV_f64_v2_areg;
1225 case NVPTXISD::LoadV4:
1226 switch (EltVT.getSimpleVT().SimpleTy) {
1230 Opcode = NVPTX::LDV_i8_v4_areg;
1233 Opcode = NVPTX::LDV_i16_v4_areg;
1236 Opcode = NVPTX::LDV_i32_v4_areg;
1239 Opcode = NVPTX::LDV_f32_v4_areg;
1246 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1247 getI32Imm(VecType), getI32Imm(FromType),
1248 getI32Imm(FromTypeWidth), Op1, Chain };
1249 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1252 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1253 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1254 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1259 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
1261 SDValue Chain = N->getOperand(0);
1266 // If this is an LDG intrinsic, the address is the third operand. Its its an
1267 // LDG/LDU SD node (from custom vector handling), then its the second operand
1268 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1269 Op1 = N->getOperand(2);
1270 Mem = cast<MemIntrinsicSDNode>(N);
1271 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1275 case Intrinsic::nvvm_ldg_global_f:
1276 case Intrinsic::nvvm_ldg_global_i:
1277 case Intrinsic::nvvm_ldg_global_p:
1280 case Intrinsic::nvvm_ldu_global_f:
1281 case Intrinsic::nvvm_ldu_global_i:
1282 case Intrinsic::nvvm_ldu_global_p:
1287 Op1 = N->getOperand(1);
1288 Mem = cast<MemSDNode>(N);
1294 SDValue Base, Offset, Addr;
1296 EVT EltVT = Mem->getMemoryVT();
1297 if (EltVT.isVector()) {
1298 EltVT = EltVT.getVectorElementType();
1301 if (SelectDirectAddr(Op1, Addr)) {
1302 switch (N->getOpcode()) {
1305 case ISD::INTRINSIC_W_CHAIN:
1307 switch (EltVT.getSimpleVT().SimpleTy) {
1311 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1314 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1317 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1320 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1323 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1326 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1330 switch (EltVT.getSimpleVT().SimpleTy) {
1334 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1337 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1340 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1343 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1346 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1349 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1354 case NVPTXISD::LDGV2:
1355 switch (EltVT.getSimpleVT().SimpleTy) {
1359 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1362 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1365 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1368 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1371 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1374 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1378 case NVPTXISD::LDUV2:
1379 switch (EltVT.getSimpleVT().SimpleTy) {
1383 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1386 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1389 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1392 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1395 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1398 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1402 case NVPTXISD::LDGV4:
1403 switch (EltVT.getSimpleVT().SimpleTy) {
1407 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1410 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1413 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1416 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1420 case NVPTXISD::LDUV4:
1421 switch (EltVT.getSimpleVT().SimpleTy) {
1425 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1428 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1431 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1434 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1440 SDValue Ops[] = { Addr, Chain };
1441 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1442 } else if (Subtarget.is64Bit()
1443 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1444 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1445 if (Subtarget.is64Bit()) {
1446 switch (N->getOpcode()) {
1449 case ISD::INTRINSIC_W_CHAIN:
1451 switch (EltVT.getSimpleVT().SimpleTy) {
1455 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1458 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1461 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1464 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1467 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1470 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1474 switch (EltVT.getSimpleVT().SimpleTy) {
1478 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1481 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1484 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1487 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1490 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1493 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1498 case NVPTXISD::LDGV2:
1499 switch (EltVT.getSimpleVT().SimpleTy) {
1503 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1506 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1509 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1512 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1515 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1518 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1522 case NVPTXISD::LDUV2:
1523 switch (EltVT.getSimpleVT().SimpleTy) {
1527 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1530 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1533 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1536 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1539 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1542 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1546 case NVPTXISD::LDGV4:
1547 switch (EltVT.getSimpleVT().SimpleTy) {
1551 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1554 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1557 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1560 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1564 case NVPTXISD::LDUV4:
1565 switch (EltVT.getSimpleVT().SimpleTy) {
1569 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1572 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1575 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1578 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1584 switch (N->getOpcode()) {
1587 case ISD::INTRINSIC_W_CHAIN:
1589 switch (EltVT.getSimpleVT().SimpleTy) {
1593 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1596 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1599 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1602 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1605 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1608 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1612 switch (EltVT.getSimpleVT().SimpleTy) {
1616 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1619 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1622 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1625 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1628 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1631 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1636 case NVPTXISD::LDGV2:
1637 switch (EltVT.getSimpleVT().SimpleTy) {
1641 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1644 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1647 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1650 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1653 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1656 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1660 case NVPTXISD::LDUV2:
1661 switch (EltVT.getSimpleVT().SimpleTy) {
1665 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1668 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1671 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1674 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1677 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1680 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1684 case NVPTXISD::LDGV4:
1685 switch (EltVT.getSimpleVT().SimpleTy) {
1689 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1692 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1695 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1698 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1702 case NVPTXISD::LDUV4:
1703 switch (EltVT.getSimpleVT().SimpleTy) {
1707 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1710 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1713 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1716 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1723 SDValue Ops[] = { Base, Offset, Chain };
1725 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1727 if (Subtarget.is64Bit()) {
1728 switch (N->getOpcode()) {
1731 case ISD::INTRINSIC_W_CHAIN:
1733 switch (EltVT.getSimpleVT().SimpleTy) {
1737 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1740 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1743 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1746 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1749 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1752 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1756 switch (EltVT.getSimpleVT().SimpleTy) {
1760 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1763 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1766 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1769 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1772 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1775 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1780 case NVPTXISD::LDGV2:
1781 switch (EltVT.getSimpleVT().SimpleTy) {
1785 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1788 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1791 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1794 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1797 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1800 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1804 case NVPTXISD::LDUV2:
1805 switch (EltVT.getSimpleVT().SimpleTy) {
1809 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1812 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1815 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1818 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1821 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1824 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1828 case NVPTXISD::LDGV4:
1829 switch (EltVT.getSimpleVT().SimpleTy) {
1833 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1836 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1839 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1842 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1846 case NVPTXISD::LDUV4:
1847 switch (EltVT.getSimpleVT().SimpleTy) {
1851 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1854 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1857 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1860 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1866 switch (N->getOpcode()) {
1869 case ISD::INTRINSIC_W_CHAIN:
1871 switch (EltVT.getSimpleVT().SimpleTy) {
1875 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1878 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1881 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1884 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1887 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1890 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1894 switch (EltVT.getSimpleVT().SimpleTy) {
1898 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1901 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1904 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1907 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1910 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1913 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1918 case NVPTXISD::LDGV2:
1919 switch (EltVT.getSimpleVT().SimpleTy) {
1923 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1926 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1929 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1932 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1935 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1938 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1942 case NVPTXISD::LDUV2:
1943 switch (EltVT.getSimpleVT().SimpleTy) {
1947 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1950 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1953 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1956 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1959 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1962 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1966 case NVPTXISD::LDGV4:
1967 switch (EltVT.getSimpleVT().SimpleTy) {
1971 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1974 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1977 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1980 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1984 case NVPTXISD::LDUV4:
1985 switch (EltVT.getSimpleVT().SimpleTy) {
1989 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1992 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1995 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1998 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
2005 SDValue Ops[] = { Op1, Chain };
2006 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
2009 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2010 MemRefs0[0] = Mem->getMemOperand();
2011 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
2016 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
2018 StoreSDNode *ST = cast<StoreSDNode>(N);
2019 EVT StoreVT = ST->getMemoryVT();
2020 SDNode *NVPTXST = nullptr;
2022 // do not support pre/post inc/dec
2023 if (ST->isIndexed())
2026 if (!StoreVT.isSimple())
2029 // Address Space Setting
2030 unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
2033 // - .volatile is only availalble for .global and .shared
2034 bool isVolatile = ST->isVolatile();
2035 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2036 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2037 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2041 MVT SimpleVT = StoreVT.getSimpleVT();
2042 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
2043 if (SimpleVT.isVector()) {
2044 unsigned num = SimpleVT.getVectorNumElements();
2046 vecType = NVPTX::PTXLdStInstCode::V2;
2048 vecType = NVPTX::PTXLdStInstCode::V4;
2053 // Type Setting: toType + toTypeWidth
2054 // - for integer type, always use 'u'
2056 MVT ScalarVT = SimpleVT.getScalarType();
2057 unsigned toTypeWidth = ScalarVT.getSizeInBits();
2058 unsigned int toType;
2059 if (ScalarVT.isFloatingPoint())
2060 toType = NVPTX::PTXLdStInstCode::Float;
2062 toType = NVPTX::PTXLdStInstCode::Unsigned;
2064 // Create the machine instruction DAG
2065 SDValue Chain = N->getOperand(0);
2066 SDValue N1 = N->getOperand(1);
2067 SDValue N2 = N->getOperand(2);
2069 SDValue Offset, Base;
2071 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
2073 if (SelectDirectAddr(N2, Addr)) {
2076 Opcode = NVPTX::ST_i8_avar;
2079 Opcode = NVPTX::ST_i16_avar;
2082 Opcode = NVPTX::ST_i32_avar;
2085 Opcode = NVPTX::ST_i64_avar;
2088 Opcode = NVPTX::ST_f32_avar;
2091 Opcode = NVPTX::ST_f64_avar;
2096 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2097 getI32Imm(vecType), getI32Imm(toType),
2098 getI32Imm(toTypeWidth), Addr, Chain };
2099 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2100 } else if (Subtarget.is64Bit()
2101 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2102 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2105 Opcode = NVPTX::ST_i8_asi;
2108 Opcode = NVPTX::ST_i16_asi;
2111 Opcode = NVPTX::ST_i32_asi;
2114 Opcode = NVPTX::ST_i64_asi;
2117 Opcode = NVPTX::ST_f32_asi;
2120 Opcode = NVPTX::ST_f64_asi;
2125 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2126 getI32Imm(vecType), getI32Imm(toType),
2127 getI32Imm(toTypeWidth), Base, Offset, Chain };
2128 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2129 } else if (Subtarget.is64Bit()
2130 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2131 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2132 if (Subtarget.is64Bit()) {
2135 Opcode = NVPTX::ST_i8_ari_64;
2138 Opcode = NVPTX::ST_i16_ari_64;
2141 Opcode = NVPTX::ST_i32_ari_64;
2144 Opcode = NVPTX::ST_i64_ari_64;
2147 Opcode = NVPTX::ST_f32_ari_64;
2150 Opcode = NVPTX::ST_f64_ari_64;
2158 Opcode = NVPTX::ST_i8_ari;
2161 Opcode = NVPTX::ST_i16_ari;
2164 Opcode = NVPTX::ST_i32_ari;
2167 Opcode = NVPTX::ST_i64_ari;
2170 Opcode = NVPTX::ST_f32_ari;
2173 Opcode = NVPTX::ST_f64_ari;
2179 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2180 getI32Imm(vecType), getI32Imm(toType),
2181 getI32Imm(toTypeWidth), Base, Offset, Chain };
2182 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2184 if (Subtarget.is64Bit()) {
2187 Opcode = NVPTX::ST_i8_areg_64;
2190 Opcode = NVPTX::ST_i16_areg_64;
2193 Opcode = NVPTX::ST_i32_areg_64;
2196 Opcode = NVPTX::ST_i64_areg_64;
2199 Opcode = NVPTX::ST_f32_areg_64;
2202 Opcode = NVPTX::ST_f64_areg_64;
2210 Opcode = NVPTX::ST_i8_areg;
2213 Opcode = NVPTX::ST_i16_areg;
2216 Opcode = NVPTX::ST_i32_areg;
2219 Opcode = NVPTX::ST_i64_areg;
2222 Opcode = NVPTX::ST_f32_areg;
2225 Opcode = NVPTX::ST_f64_areg;
2231 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2232 getI32Imm(vecType), getI32Imm(toType),
2233 getI32Imm(toTypeWidth), N2, Chain };
2234 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2238 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2239 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2240 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2246 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
2247 SDValue Chain = N->getOperand(0);
2248 SDValue Op1 = N->getOperand(1);
2249 SDValue Addr, Offset, Base;
2253 EVT EltVT = Op1.getValueType();
2254 MemSDNode *MemSD = cast<MemSDNode>(N);
2255 EVT StoreVT = MemSD->getMemoryVT();
2257 // Address Space Setting
2258 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
2260 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2261 report_fatal_error("Cannot store to pointer that points to constant "
2266 // - .volatile is only availalble for .global and .shared
2267 bool IsVolatile = MemSD->isVolatile();
2268 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2269 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2270 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2273 // Type Setting: toType + toTypeWidth
2274 // - for integer type, always use 'u'
2275 assert(StoreVT.isSimple() && "Store value is not simple");
2276 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2277 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2279 if (ScalarVT.isFloatingPoint())
2280 ToType = NVPTX::PTXLdStInstCode::Float;
2282 ToType = NVPTX::PTXLdStInstCode::Unsigned;
2284 SmallVector<SDValue, 12> StOps;
2288 switch (N->getOpcode()) {
2289 case NVPTXISD::StoreV2:
2290 VecType = NVPTX::PTXLdStInstCode::V2;
2291 StOps.push_back(N->getOperand(1));
2292 StOps.push_back(N->getOperand(2));
2293 N2 = N->getOperand(3);
2295 case NVPTXISD::StoreV4:
2296 VecType = NVPTX::PTXLdStInstCode::V4;
2297 StOps.push_back(N->getOperand(1));
2298 StOps.push_back(N->getOperand(2));
2299 StOps.push_back(N->getOperand(3));
2300 StOps.push_back(N->getOperand(4));
2301 N2 = N->getOperand(5);
2307 StOps.push_back(getI32Imm(IsVolatile));
2308 StOps.push_back(getI32Imm(CodeAddrSpace));
2309 StOps.push_back(getI32Imm(VecType));
2310 StOps.push_back(getI32Imm(ToType));
2311 StOps.push_back(getI32Imm(ToTypeWidth));
2313 if (SelectDirectAddr(N2, Addr)) {
2314 switch (N->getOpcode()) {
2317 case NVPTXISD::StoreV2:
2318 switch (EltVT.getSimpleVT().SimpleTy) {
2322 Opcode = NVPTX::STV_i8_v2_avar;
2325 Opcode = NVPTX::STV_i16_v2_avar;
2328 Opcode = NVPTX::STV_i32_v2_avar;
2331 Opcode = NVPTX::STV_i64_v2_avar;
2334 Opcode = NVPTX::STV_f32_v2_avar;
2337 Opcode = NVPTX::STV_f64_v2_avar;
2341 case NVPTXISD::StoreV4:
2342 switch (EltVT.getSimpleVT().SimpleTy) {
2346 Opcode = NVPTX::STV_i8_v4_avar;
2349 Opcode = NVPTX::STV_i16_v4_avar;
2352 Opcode = NVPTX::STV_i32_v4_avar;
2355 Opcode = NVPTX::STV_f32_v4_avar;
2360 StOps.push_back(Addr);
2361 } else if (Subtarget.is64Bit()
2362 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2363 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2364 switch (N->getOpcode()) {
2367 case NVPTXISD::StoreV2:
2368 switch (EltVT.getSimpleVT().SimpleTy) {
2372 Opcode = NVPTX::STV_i8_v2_asi;
2375 Opcode = NVPTX::STV_i16_v2_asi;
2378 Opcode = NVPTX::STV_i32_v2_asi;
2381 Opcode = NVPTX::STV_i64_v2_asi;
2384 Opcode = NVPTX::STV_f32_v2_asi;
2387 Opcode = NVPTX::STV_f64_v2_asi;
2391 case NVPTXISD::StoreV4:
2392 switch (EltVT.getSimpleVT().SimpleTy) {
2396 Opcode = NVPTX::STV_i8_v4_asi;
2399 Opcode = NVPTX::STV_i16_v4_asi;
2402 Opcode = NVPTX::STV_i32_v4_asi;
2405 Opcode = NVPTX::STV_f32_v4_asi;
2410 StOps.push_back(Base);
2411 StOps.push_back(Offset);
2412 } else if (Subtarget.is64Bit()
2413 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2414 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2415 if (Subtarget.is64Bit()) {
2416 switch (N->getOpcode()) {
2419 case NVPTXISD::StoreV2:
2420 switch (EltVT.getSimpleVT().SimpleTy) {
2424 Opcode = NVPTX::STV_i8_v2_ari_64;
2427 Opcode = NVPTX::STV_i16_v2_ari_64;
2430 Opcode = NVPTX::STV_i32_v2_ari_64;
2433 Opcode = NVPTX::STV_i64_v2_ari_64;
2436 Opcode = NVPTX::STV_f32_v2_ari_64;
2439 Opcode = NVPTX::STV_f64_v2_ari_64;
2443 case NVPTXISD::StoreV4:
2444 switch (EltVT.getSimpleVT().SimpleTy) {
2448 Opcode = NVPTX::STV_i8_v4_ari_64;
2451 Opcode = NVPTX::STV_i16_v4_ari_64;
2454 Opcode = NVPTX::STV_i32_v4_ari_64;
2457 Opcode = NVPTX::STV_f32_v4_ari_64;
2463 switch (N->getOpcode()) {
2466 case NVPTXISD::StoreV2:
2467 switch (EltVT.getSimpleVT().SimpleTy) {
2471 Opcode = NVPTX::STV_i8_v2_ari;
2474 Opcode = NVPTX::STV_i16_v2_ari;
2477 Opcode = NVPTX::STV_i32_v2_ari;
2480 Opcode = NVPTX::STV_i64_v2_ari;
2483 Opcode = NVPTX::STV_f32_v2_ari;
2486 Opcode = NVPTX::STV_f64_v2_ari;
2490 case NVPTXISD::StoreV4:
2491 switch (EltVT.getSimpleVT().SimpleTy) {
2495 Opcode = NVPTX::STV_i8_v4_ari;
2498 Opcode = NVPTX::STV_i16_v4_ari;
2501 Opcode = NVPTX::STV_i32_v4_ari;
2504 Opcode = NVPTX::STV_f32_v4_ari;
2510 StOps.push_back(Base);
2511 StOps.push_back(Offset);
2513 if (Subtarget.is64Bit()) {
2514 switch (N->getOpcode()) {
2517 case NVPTXISD::StoreV2:
2518 switch (EltVT.getSimpleVT().SimpleTy) {
2522 Opcode = NVPTX::STV_i8_v2_areg_64;
2525 Opcode = NVPTX::STV_i16_v2_areg_64;
2528 Opcode = NVPTX::STV_i32_v2_areg_64;
2531 Opcode = NVPTX::STV_i64_v2_areg_64;
2534 Opcode = NVPTX::STV_f32_v2_areg_64;
2537 Opcode = NVPTX::STV_f64_v2_areg_64;
2541 case NVPTXISD::StoreV4:
2542 switch (EltVT.getSimpleVT().SimpleTy) {
2546 Opcode = NVPTX::STV_i8_v4_areg_64;
2549 Opcode = NVPTX::STV_i16_v4_areg_64;
2552 Opcode = NVPTX::STV_i32_v4_areg_64;
2555 Opcode = NVPTX::STV_f32_v4_areg_64;
2561 switch (N->getOpcode()) {
2564 case NVPTXISD::StoreV2:
2565 switch (EltVT.getSimpleVT().SimpleTy) {
2569 Opcode = NVPTX::STV_i8_v2_areg;
2572 Opcode = NVPTX::STV_i16_v2_areg;
2575 Opcode = NVPTX::STV_i32_v2_areg;
2578 Opcode = NVPTX::STV_i64_v2_areg;
2581 Opcode = NVPTX::STV_f32_v2_areg;
2584 Opcode = NVPTX::STV_f64_v2_areg;
2588 case NVPTXISD::StoreV4:
2589 switch (EltVT.getSimpleVT().SimpleTy) {
2593 Opcode = NVPTX::STV_i8_v4_areg;
2596 Opcode = NVPTX::STV_i16_v4_areg;
2599 Opcode = NVPTX::STV_i32_v4_areg;
2602 Opcode = NVPTX::STV_f32_v4_areg;
2608 StOps.push_back(N2);
2611 StOps.push_back(Chain);
2613 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2615 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2616 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2617 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2622 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2623 SDValue Chain = Node->getOperand(0);
2624 SDValue Offset = Node->getOperand(2);
2625 SDValue Flag = Node->getOperand(3);
2627 MemSDNode *Mem = cast<MemSDNode>(Node);
2630 switch (Node->getOpcode()) {
2633 case NVPTXISD::LoadParam:
2636 case NVPTXISD::LoadParamV2:
2639 case NVPTXISD::LoadParamV4:
2644 EVT EltVT = Node->getValueType(0);
2645 EVT MemVT = Mem->getMemoryVT();
2653 switch (MemVT.getSimpleVT().SimpleTy) {
2657 Opc = NVPTX::LoadParamMemI8;
2660 Opc = NVPTX::LoadParamMemI8;
2663 Opc = NVPTX::LoadParamMemI16;
2666 Opc = NVPTX::LoadParamMemI32;
2669 Opc = NVPTX::LoadParamMemI64;
2672 Opc = NVPTX::LoadParamMemF32;
2675 Opc = NVPTX::LoadParamMemF64;
2680 switch (MemVT.getSimpleVT().SimpleTy) {
2684 Opc = NVPTX::LoadParamMemV2I8;
2687 Opc = NVPTX::LoadParamMemV2I8;
2690 Opc = NVPTX::LoadParamMemV2I16;
2693 Opc = NVPTX::LoadParamMemV2I32;
2696 Opc = NVPTX::LoadParamMemV2I64;
2699 Opc = NVPTX::LoadParamMemV2F32;
2702 Opc = NVPTX::LoadParamMemV2F64;
2707 switch (MemVT.getSimpleVT().SimpleTy) {
2711 Opc = NVPTX::LoadParamMemV4I8;
2714 Opc = NVPTX::LoadParamMemV4I8;
2717 Opc = NVPTX::LoadParamMemV4I16;
2720 Opc = NVPTX::LoadParamMemV4I32;
2723 Opc = NVPTX::LoadParamMemV4F32;
2731 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2732 } else if (VecSize == 2) {
2733 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2735 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2736 VTs = CurDAG->getVTList(EVTs);
2739 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2741 SmallVector<SDValue, 2> Ops;
2742 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2743 Ops.push_back(Chain);
2744 Ops.push_back(Flag);
2747 CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2751 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2753 SDValue Chain = N->getOperand(0);
2754 SDValue Offset = N->getOperand(1);
2755 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2756 MemSDNode *Mem = cast<MemSDNode>(N);
2758 // How many elements do we have?
2759 unsigned NumElts = 1;
2760 switch (N->getOpcode()) {
2763 case NVPTXISD::StoreRetval:
2766 case NVPTXISD::StoreRetvalV2:
2769 case NVPTXISD::StoreRetvalV4:
2774 // Build vector of operands
2775 SmallVector<SDValue, 6> Ops;
2776 for (unsigned i = 0; i < NumElts; ++i)
2777 Ops.push_back(N->getOperand(i + 2));
2778 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2779 Ops.push_back(Chain);
2781 // Determine target opcode
2782 // If we have an i1, use an 8-bit store. The lowering code in
2783 // NVPTXISelLowering will have already emitted an upcast.
2784 unsigned Opcode = 0;
2789 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2793 Opcode = NVPTX::StoreRetvalI8;
2796 Opcode = NVPTX::StoreRetvalI8;
2799 Opcode = NVPTX::StoreRetvalI16;
2802 Opcode = NVPTX::StoreRetvalI32;
2805 Opcode = NVPTX::StoreRetvalI64;
2808 Opcode = NVPTX::StoreRetvalF32;
2811 Opcode = NVPTX::StoreRetvalF64;
2816 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2820 Opcode = NVPTX::StoreRetvalV2I8;
2823 Opcode = NVPTX::StoreRetvalV2I8;
2826 Opcode = NVPTX::StoreRetvalV2I16;
2829 Opcode = NVPTX::StoreRetvalV2I32;
2832 Opcode = NVPTX::StoreRetvalV2I64;
2835 Opcode = NVPTX::StoreRetvalV2F32;
2838 Opcode = NVPTX::StoreRetvalV2F64;
2843 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2847 Opcode = NVPTX::StoreRetvalV4I8;
2850 Opcode = NVPTX::StoreRetvalV4I8;
2853 Opcode = NVPTX::StoreRetvalV4I16;
2856 Opcode = NVPTX::StoreRetvalV4I32;
2859 Opcode = NVPTX::StoreRetvalV4F32;
2866 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2867 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2868 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2869 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2874 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2876 SDValue Chain = N->getOperand(0);
2877 SDValue Param = N->getOperand(1);
2878 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2879 SDValue Offset = N->getOperand(2);
2880 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2881 MemSDNode *Mem = cast<MemSDNode>(N);
2882 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2884 // How many elements do we have?
2885 unsigned NumElts = 1;
2886 switch (N->getOpcode()) {
2889 case NVPTXISD::StoreParamU32:
2890 case NVPTXISD::StoreParamS32:
2891 case NVPTXISD::StoreParam:
2894 case NVPTXISD::StoreParamV2:
2897 case NVPTXISD::StoreParamV4:
2902 // Build vector of operands
2903 SmallVector<SDValue, 8> Ops;
2904 for (unsigned i = 0; i < NumElts; ++i)
2905 Ops.push_back(N->getOperand(i + 3));
2906 Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
2907 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2908 Ops.push_back(Chain);
2909 Ops.push_back(Flag);
2911 // Determine target opcode
2912 // If we have an i1, use an 8-bit store. The lowering code in
2913 // NVPTXISelLowering will have already emitted an upcast.
2914 unsigned Opcode = 0;
2915 switch (N->getOpcode()) {
2921 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2925 Opcode = NVPTX::StoreParamI8;
2928 Opcode = NVPTX::StoreParamI8;
2931 Opcode = NVPTX::StoreParamI16;
2934 Opcode = NVPTX::StoreParamI32;
2937 Opcode = NVPTX::StoreParamI64;
2940 Opcode = NVPTX::StoreParamF32;
2943 Opcode = NVPTX::StoreParamF64;
2948 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2952 Opcode = NVPTX::StoreParamV2I8;
2955 Opcode = NVPTX::StoreParamV2I8;
2958 Opcode = NVPTX::StoreParamV2I16;
2961 Opcode = NVPTX::StoreParamV2I32;
2964 Opcode = NVPTX::StoreParamV2I64;
2967 Opcode = NVPTX::StoreParamV2F32;
2970 Opcode = NVPTX::StoreParamV2F64;
2975 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2979 Opcode = NVPTX::StoreParamV4I8;
2982 Opcode = NVPTX::StoreParamV4I8;
2985 Opcode = NVPTX::StoreParamV4I16;
2988 Opcode = NVPTX::StoreParamV4I32;
2991 Opcode = NVPTX::StoreParamV4F32;
2997 // Special case: if we have a sign-extend/zero-extend node, insert the
2998 // conversion instruction first, and use that as the value operand to
2999 // the selected StoreParam node.
3000 case NVPTXISD::StoreParamU32: {
3001 Opcode = NVPTX::StoreParamI32;
3002 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
3004 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
3005 MVT::i32, Ops[0], CvtNone);
3006 Ops[0] = SDValue(Cvt, 0);
3009 case NVPTXISD::StoreParamS32: {
3010 Opcode = NVPTX::StoreParamI32;
3011 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
3013 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
3014 MVT::i32, Ops[0], CvtNone);
3015 Ops[0] = SDValue(Cvt, 0);
3020 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
3022 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
3023 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
3024 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
3025 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
3030 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
3031 SDValue Chain = N->getOperand(0);
3032 SDNode *Ret = nullptr;
3034 SmallVector<SDValue, 8> Ops;
3036 switch (N->getOpcode()) {
3037 default: return nullptr;
3038 case NVPTXISD::Tex1DFloatS32:
3039 Opc = NVPTX::TEX_1D_F32_S32;
3041 case NVPTXISD::Tex1DFloatFloat:
3042 Opc = NVPTX::TEX_1D_F32_F32;
3044 case NVPTXISD::Tex1DFloatFloatLevel:
3045 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3047 case NVPTXISD::Tex1DFloatFloatGrad:
3048 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3050 case NVPTXISD::Tex1DS32S32:
3051 Opc = NVPTX::TEX_1D_S32_S32;
3053 case NVPTXISD::Tex1DS32Float:
3054 Opc = NVPTX::TEX_1D_S32_F32;
3056 case NVPTXISD::Tex1DS32FloatLevel:
3057 Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
3059 case NVPTXISD::Tex1DS32FloatGrad:
3060 Opc = NVPTX::TEX_1D_S32_F32_GRAD;
3062 case NVPTXISD::Tex1DU32S32:
3063 Opc = NVPTX::TEX_1D_U32_S32;
3065 case NVPTXISD::Tex1DU32Float:
3066 Opc = NVPTX::TEX_1D_U32_F32;
3068 case NVPTXISD::Tex1DU32FloatLevel:
3069 Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3071 case NVPTXISD::Tex1DU32FloatGrad:
3072 Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3074 case NVPTXISD::Tex1DArrayFloatS32:
3075 Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
3077 case NVPTXISD::Tex1DArrayFloatFloat:
3078 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3080 case NVPTXISD::Tex1DArrayFloatFloatLevel:
3081 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3083 case NVPTXISD::Tex1DArrayFloatFloatGrad:
3084 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3086 case NVPTXISD::Tex1DArrayS32S32:
3087 Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
3089 case NVPTXISD::Tex1DArrayS32Float:
3090 Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
3092 case NVPTXISD::Tex1DArrayS32FloatLevel:
3093 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
3095 case NVPTXISD::Tex1DArrayS32FloatGrad:
3096 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
3098 case NVPTXISD::Tex1DArrayU32S32:
3099 Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3101 case NVPTXISD::Tex1DArrayU32Float:
3102 Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3104 case NVPTXISD::Tex1DArrayU32FloatLevel:
3105 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3107 case NVPTXISD::Tex1DArrayU32FloatGrad:
3108 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3110 case NVPTXISD::Tex2DFloatS32:
3111 Opc = NVPTX::TEX_2D_F32_S32;
3113 case NVPTXISD::Tex2DFloatFloat:
3114 Opc = NVPTX::TEX_2D_F32_F32;
3116 case NVPTXISD::Tex2DFloatFloatLevel:
3117 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3119 case NVPTXISD::Tex2DFloatFloatGrad:
3120 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3122 case NVPTXISD::Tex2DS32S32:
3123 Opc = NVPTX::TEX_2D_S32_S32;
3125 case NVPTXISD::Tex2DS32Float:
3126 Opc = NVPTX::TEX_2D_S32_F32;
3128 case NVPTXISD::Tex2DS32FloatLevel:
3129 Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
3131 case NVPTXISD::Tex2DS32FloatGrad:
3132 Opc = NVPTX::TEX_2D_S32_F32_GRAD;
3134 case NVPTXISD::Tex2DU32S32:
3135 Opc = NVPTX::TEX_2D_U32_S32;
3137 case NVPTXISD::Tex2DU32Float:
3138 Opc = NVPTX::TEX_2D_U32_F32;
3140 case NVPTXISD::Tex2DU32FloatLevel:
3141 Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3143 case NVPTXISD::Tex2DU32FloatGrad:
3144 Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3146 case NVPTXISD::Tex2DArrayFloatS32:
3147 Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
3149 case NVPTXISD::Tex2DArrayFloatFloat:
3150 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3152 case NVPTXISD::Tex2DArrayFloatFloatLevel:
3153 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3155 case NVPTXISD::Tex2DArrayFloatFloatGrad:
3156 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3158 case NVPTXISD::Tex2DArrayS32S32:
3159 Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
3161 case NVPTXISD::Tex2DArrayS32Float:
3162 Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
3164 case NVPTXISD::Tex2DArrayS32FloatLevel:
3165 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
3167 case NVPTXISD::Tex2DArrayS32FloatGrad:
3168 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
3170 case NVPTXISD::Tex2DArrayU32S32:
3171 Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3173 case NVPTXISD::Tex2DArrayU32Float:
3174 Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3176 case NVPTXISD::Tex2DArrayU32FloatLevel:
3177 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3179 case NVPTXISD::Tex2DArrayU32FloatGrad:
3180 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3182 case NVPTXISD::Tex3DFloatS32:
3183 Opc = NVPTX::TEX_3D_F32_S32;
3185 case NVPTXISD::Tex3DFloatFloat:
3186 Opc = NVPTX::TEX_3D_F32_F32;
3188 case NVPTXISD::Tex3DFloatFloatLevel:
3189 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3191 case NVPTXISD::Tex3DFloatFloatGrad:
3192 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3194 case NVPTXISD::Tex3DS32S32:
3195 Opc = NVPTX::TEX_3D_S32_S32;
3197 case NVPTXISD::Tex3DS32Float:
3198 Opc = NVPTX::TEX_3D_S32_F32;
3200 case NVPTXISD::Tex3DS32FloatLevel:
3201 Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
3203 case NVPTXISD::Tex3DS32FloatGrad:
3204 Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3206 case NVPTXISD::Tex3DU32S32:
3207 Opc = NVPTX::TEX_3D_U32_S32;
3209 case NVPTXISD::Tex3DU32Float:
3210 Opc = NVPTX::TEX_3D_U32_F32;
3212 case NVPTXISD::Tex3DU32FloatLevel:
3213 Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3215 case NVPTXISD::Tex3DU32FloatGrad:
3216 Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3218 case NVPTXISD::TexCubeFloatFloat:
3219 Opc = NVPTX::TEX_CUBE_F32_F32;
3221 case NVPTXISD::TexCubeFloatFloatLevel:
3222 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3224 case NVPTXISD::TexCubeS32Float:
3225 Opc = NVPTX::TEX_CUBE_S32_F32;
3227 case NVPTXISD::TexCubeS32FloatLevel:
3228 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3230 case NVPTXISD::TexCubeU32Float:
3231 Opc = NVPTX::TEX_CUBE_U32_F32;
3233 case NVPTXISD::TexCubeU32FloatLevel:
3234 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3236 case NVPTXISD::TexCubeArrayFloatFloat:
3237 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3239 case NVPTXISD::TexCubeArrayFloatFloatLevel:
3240 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3242 case NVPTXISD::TexCubeArrayS32Float:
3243 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3245 case NVPTXISD::TexCubeArrayS32FloatLevel:
3246 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3248 case NVPTXISD::TexCubeArrayU32Float:
3249 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3251 case NVPTXISD::TexCubeArrayU32FloatLevel:
3252 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3254 case NVPTXISD::Tld4R2DFloatFloat:
3255 Opc = NVPTX::TLD4_R_2D_F32_F32;
3257 case NVPTXISD::Tld4G2DFloatFloat:
3258 Opc = NVPTX::TLD4_G_2D_F32_F32;
3260 case NVPTXISD::Tld4B2DFloatFloat:
3261 Opc = NVPTX::TLD4_B_2D_F32_F32;
3263 case NVPTXISD::Tld4A2DFloatFloat:
3264 Opc = NVPTX::TLD4_A_2D_F32_F32;
3266 case NVPTXISD::Tld4R2DS64Float:
3267 Opc = NVPTX::TLD4_R_2D_S32_F32;
3269 case NVPTXISD::Tld4G2DS64Float:
3270 Opc = NVPTX::TLD4_G_2D_S32_F32;
3272 case NVPTXISD::Tld4B2DS64Float:
3273 Opc = NVPTX::TLD4_B_2D_S32_F32;
3275 case NVPTXISD::Tld4A2DS64Float:
3276 Opc = NVPTX::TLD4_A_2D_S32_F32;
3278 case NVPTXISD::Tld4R2DU64Float:
3279 Opc = NVPTX::TLD4_R_2D_U32_F32;
3281 case NVPTXISD::Tld4G2DU64Float:
3282 Opc = NVPTX::TLD4_G_2D_U32_F32;
3284 case NVPTXISD::Tld4B2DU64Float:
3285 Opc = NVPTX::TLD4_B_2D_U32_F32;
3287 case NVPTXISD::Tld4A2DU64Float:
3288 Opc = NVPTX::TLD4_A_2D_U32_F32;
3290 case NVPTXISD::TexUnified1DFloatS32:
3291 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3293 case NVPTXISD::TexUnified1DFloatFloat:
3294 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3296 case NVPTXISD::TexUnified1DFloatFloatLevel:
3297 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3299 case NVPTXISD::TexUnified1DFloatFloatGrad:
3300 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3302 case NVPTXISD::TexUnified1DS32S32:
3303 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3305 case NVPTXISD::TexUnified1DS32Float:
3306 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3308 case NVPTXISD::TexUnified1DS32FloatLevel:
3309 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3311 case NVPTXISD::TexUnified1DS32FloatGrad:
3312 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3314 case NVPTXISD::TexUnified1DU32S32:
3315 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3317 case NVPTXISD::TexUnified1DU32Float:
3318 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3320 case NVPTXISD::TexUnified1DU32FloatLevel:
3321 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3323 case NVPTXISD::TexUnified1DU32FloatGrad:
3324 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3326 case NVPTXISD::TexUnified1DArrayFloatS32:
3327 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3329 case NVPTXISD::TexUnified1DArrayFloatFloat:
3330 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3332 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
3333 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3335 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
3336 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3338 case NVPTXISD::TexUnified1DArrayS32S32:
3339 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3341 case NVPTXISD::TexUnified1DArrayS32Float:
3342 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3344 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
3345 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3347 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
3348 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3350 case NVPTXISD::TexUnified1DArrayU32S32:
3351 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3353 case NVPTXISD::TexUnified1DArrayU32Float:
3354 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3356 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
3357 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3359 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
3360 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3362 case NVPTXISD::TexUnified2DFloatS32:
3363 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3365 case NVPTXISD::TexUnified2DFloatFloat:
3366 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3368 case NVPTXISD::TexUnified2DFloatFloatLevel:
3369 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3371 case NVPTXISD::TexUnified2DFloatFloatGrad:
3372 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3374 case NVPTXISD::TexUnified2DS32S32:
3375 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3377 case NVPTXISD::TexUnified2DS32Float:
3378 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3380 case NVPTXISD::TexUnified2DS32FloatLevel:
3381 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3383 case NVPTXISD::TexUnified2DS32FloatGrad:
3384 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3386 case NVPTXISD::TexUnified2DU32S32:
3387 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3389 case NVPTXISD::TexUnified2DU32Float:
3390 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3392 case NVPTXISD::TexUnified2DU32FloatLevel:
3393 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3395 case NVPTXISD::TexUnified2DU32FloatGrad:
3396 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3398 case NVPTXISD::TexUnified2DArrayFloatS32:
3399 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3401 case NVPTXISD::TexUnified2DArrayFloatFloat:
3402 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3404 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
3405 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3407 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
3408 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3410 case NVPTXISD::TexUnified2DArrayS32S32:
3411 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3413 case NVPTXISD::TexUnified2DArrayS32Float:
3414 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3416 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
3417 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3419 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
3420 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3422 case NVPTXISD::TexUnified2DArrayU32S32:
3423 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3425 case NVPTXISD::TexUnified2DArrayU32Float:
3426 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3428 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
3429 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3431 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
3432 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3434 case NVPTXISD::TexUnified3DFloatS32:
3435 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3437 case NVPTXISD::TexUnified3DFloatFloat:
3438 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3440 case NVPTXISD::TexUnified3DFloatFloatLevel:
3441 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3443 case NVPTXISD::TexUnified3DFloatFloatGrad:
3444 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3446 case NVPTXISD::TexUnified3DS32S32:
3447 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3449 case NVPTXISD::TexUnified3DS32Float:
3450 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3452 case NVPTXISD::TexUnified3DS32FloatLevel:
3453 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3455 case NVPTXISD::TexUnified3DS32FloatGrad:
3456 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3458 case NVPTXISD::TexUnified3DU32S32:
3459 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3461 case NVPTXISD::TexUnified3DU32Float:
3462 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3464 case NVPTXISD::TexUnified3DU32FloatLevel:
3465 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3467 case NVPTXISD::TexUnified3DU32FloatGrad:
3468 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3470 case NVPTXISD::TexUnifiedCubeFloatFloat:
3471 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3473 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
3474 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3476 case NVPTXISD::TexUnifiedCubeS32Float:
3477 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3479 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
3480 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3482 case NVPTXISD::TexUnifiedCubeU32Float:
3483 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3485 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
3486 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3488 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
3489 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3491 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
3492 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3494 case NVPTXISD::TexUnifiedCubeArrayS32Float:
3495 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3497 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
3498 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3500 case NVPTXISD::TexUnifiedCubeArrayU32Float:
3501 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3503 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
3504 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3506 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
3507 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3509 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
3510 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3512 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
3513 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3515 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
3516 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3518 case NVPTXISD::Tld4UnifiedR2DS64Float:
3519 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3521 case NVPTXISD::Tld4UnifiedG2DS64Float:
3522 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3524 case NVPTXISD::Tld4UnifiedB2DS64Float:
3525 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3527 case NVPTXISD::Tld4UnifiedA2DS64Float:
3528 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3530 case NVPTXISD::Tld4UnifiedR2DU64Float:
3531 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3533 case NVPTXISD::Tld4UnifiedG2DU64Float:
3534 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3536 case NVPTXISD::Tld4UnifiedB2DU64Float:
3537 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3539 case NVPTXISD::Tld4UnifiedA2DU64Float:
3540 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
3544 // Copy over operands
3545 for (unsigned i = 1; i < N->getNumOperands(); ++i) {
3546 Ops.push_back(N->getOperand(i));
3549 Ops.push_back(Chain);
3550 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3554 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
3555 SDValue Chain = N->getOperand(0);
3556 SDValue TexHandle = N->getOperand(1);
3557 SDNode *Ret = nullptr;
3559 SmallVector<SDValue, 8> Ops;
3560 switch (N->getOpcode()) {
3561 default: return nullptr;
3562 case NVPTXISD::Suld1DI8Clamp:
3563 Opc = NVPTX::SULD_1D_I8_CLAMP;
3564 Ops.push_back(TexHandle);
3565 Ops.push_back(N->getOperand(2));
3566 Ops.push_back(Chain);
3568 case NVPTXISD::Suld1DI16Clamp:
3569 Opc = NVPTX::SULD_1D_I16_CLAMP;
3570 Ops.push_back(TexHandle);
3571 Ops.push_back(N->getOperand(2));
3572 Ops.push_back(Chain);
3574 case NVPTXISD::Suld1DI32Clamp:
3575 Opc = NVPTX::SULD_1D_I32_CLAMP;
3576 Ops.push_back(TexHandle);
3577 Ops.push_back(N->getOperand(2));
3578 Ops.push_back(Chain);
3580 case NVPTXISD::Suld1DI64Clamp:
3581 Opc = NVPTX::SULD_1D_I64_CLAMP;
3582 Ops.push_back(TexHandle);
3583 Ops.push_back(N->getOperand(2));
3584 Ops.push_back(Chain);
3586 case NVPTXISD::Suld1DV2I8Clamp:
3587 Opc = NVPTX::SULD_1D_V2I8_CLAMP;
3588 Ops.push_back(TexHandle);
3589 Ops.push_back(N->getOperand(2));
3590 Ops.push_back(Chain);
3592 case NVPTXISD::Suld1DV2I16Clamp:
3593 Opc = NVPTX::SULD_1D_V2I16_CLAMP;
3594 Ops.push_back(TexHandle);
3595 Ops.push_back(N->getOperand(2));
3596 Ops.push_back(Chain);
3598 case NVPTXISD::Suld1DV2I32Clamp:
3599 Opc = NVPTX::SULD_1D_V2I32_CLAMP;
3600 Ops.push_back(TexHandle);
3601 Ops.push_back(N->getOperand(2));
3602 Ops.push_back(Chain);
3604 case NVPTXISD::Suld1DV2I64Clamp:
3605 Opc = NVPTX::SULD_1D_V2I64_CLAMP;
3606 Ops.push_back(TexHandle);
3607 Ops.push_back(N->getOperand(2));
3608 Ops.push_back(Chain);
3610 case NVPTXISD::Suld1DV4I8Clamp:
3611 Opc = NVPTX::SULD_1D_V4I8_CLAMP;
3612 Ops.push_back(TexHandle);
3613 Ops.push_back(N->getOperand(2));
3614 Ops.push_back(Chain);
3616 case NVPTXISD::Suld1DV4I16Clamp:
3617 Opc = NVPTX::SULD_1D_V4I16_CLAMP;
3618 Ops.push_back(TexHandle);
3619 Ops.push_back(N->getOperand(2));
3620 Ops.push_back(Chain);
3622 case NVPTXISD::Suld1DV4I32Clamp:
3623 Opc = NVPTX::SULD_1D_V4I32_CLAMP;
3624 Ops.push_back(TexHandle);
3625 Ops.push_back(N->getOperand(2));
3626 Ops.push_back(Chain);
3628 case NVPTXISD::Suld1DArrayI8Clamp:
3629 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
3630 Ops.push_back(TexHandle);
3631 Ops.push_back(N->getOperand(2));
3632 Ops.push_back(N->getOperand(3));
3633 Ops.push_back(Chain);
3635 case NVPTXISD::Suld1DArrayI16Clamp:
3636 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
3637 Ops.push_back(TexHandle);
3638 Ops.push_back(N->getOperand(2));
3639 Ops.push_back(N->getOperand(3));
3640 Ops.push_back(Chain);
3642 case NVPTXISD::Suld1DArrayI32Clamp:
3643 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
3644 Ops.push_back(TexHandle);
3645 Ops.push_back(N->getOperand(2));
3646 Ops.push_back(N->getOperand(3));
3647 Ops.push_back(Chain);
3649 case NVPTXISD::Suld1DArrayI64Clamp:
3650 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
3651 Ops.push_back(TexHandle);
3652 Ops.push_back(N->getOperand(2));
3653 Ops.push_back(N->getOperand(3));
3654 Ops.push_back(Chain);
3656 case NVPTXISD::Suld1DArrayV2I8Clamp:
3657 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
3658 Ops.push_back(TexHandle);
3659 Ops.push_back(N->getOperand(2));
3660 Ops.push_back(N->getOperand(3));
3661 Ops.push_back(Chain);
3663 case NVPTXISD::Suld1DArrayV2I16Clamp:
3664 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
3665 Ops.push_back(TexHandle);
3666 Ops.push_back(N->getOperand(2));
3667 Ops.push_back(N->getOperand(3));
3668 Ops.push_back(Chain);
3670 case NVPTXISD::Suld1DArrayV2I32Clamp:
3671 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
3672 Ops.push_back(TexHandle);
3673 Ops.push_back(N->getOperand(2));
3674 Ops.push_back(N->getOperand(3));
3675 Ops.push_back(Chain);
3677 case NVPTXISD::Suld1DArrayV2I64Clamp:
3678 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
3679 Ops.push_back(TexHandle);
3680 Ops.push_back(N->getOperand(2));
3681 Ops.push_back(N->getOperand(3));
3682 Ops.push_back(Chain);
3684 case NVPTXISD::Suld1DArrayV4I8Clamp:
3685 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
3686 Ops.push_back(TexHandle);
3687 Ops.push_back(N->getOperand(2));
3688 Ops.push_back(N->getOperand(3));
3689 Ops.push_back(Chain);
3691 case NVPTXISD::Suld1DArrayV4I16Clamp:
3692 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
3693 Ops.push_back(TexHandle);
3694 Ops.push_back(N->getOperand(2));
3695 Ops.push_back(N->getOperand(3));
3696 Ops.push_back(Chain);
3698 case NVPTXISD::Suld1DArrayV4I32Clamp:
3699 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
3700 Ops.push_back(TexHandle);
3701 Ops.push_back(N->getOperand(2));
3702 Ops.push_back(N->getOperand(3));
3703 Ops.push_back(Chain);
3705 case NVPTXISD::Suld2DI8Clamp:
3706 Opc = NVPTX::SULD_2D_I8_CLAMP;
3707 Ops.push_back(TexHandle);
3708 Ops.push_back(N->getOperand(2));
3709 Ops.push_back(N->getOperand(3));
3710 Ops.push_back(Chain);
3712 case NVPTXISD::Suld2DI16Clamp:
3713 Opc = NVPTX::SULD_2D_I16_CLAMP;
3714 Ops.push_back(TexHandle);
3715 Ops.push_back(N->getOperand(2));
3716 Ops.push_back(N->getOperand(3));
3717 Ops.push_back(Chain);
3719 case NVPTXISD::Suld2DI32Clamp:
3720 Opc = NVPTX::SULD_2D_I32_CLAMP;
3721 Ops.push_back(TexHandle);
3722 Ops.push_back(N->getOperand(2));
3723 Ops.push_back(N->getOperand(3));
3724 Ops.push_back(Chain);
3726 case NVPTXISD::Suld2DI64Clamp:
3727 Opc = NVPTX::SULD_2D_I64_CLAMP;
3728 Ops.push_back(TexHandle);
3729 Ops.push_back(N->getOperand(2));
3730 Ops.push_back(N->getOperand(3));
3731 Ops.push_back(Chain);
3733 case NVPTXISD::Suld2DV2I8Clamp:
3734 Opc = NVPTX::SULD_2D_V2I8_CLAMP;
3735 Ops.push_back(TexHandle);
3736 Ops.push_back(N->getOperand(2));
3737 Ops.push_back(N->getOperand(3));
3738 Ops.push_back(Chain);
3740 case NVPTXISD::Suld2DV2I16Clamp:
3741 Opc = NVPTX::SULD_2D_V2I16_CLAMP;
3742 Ops.push_back(TexHandle);
3743 Ops.push_back(N->getOperand(2));
3744 Ops.push_back(N->getOperand(3));
3745 Ops.push_back(Chain);
3747 case NVPTXISD::Suld2DV2I32Clamp:
3748 Opc = NVPTX::SULD_2D_V2I32_CLAMP;
3749 Ops.push_back(TexHandle);
3750 Ops.push_back(N->getOperand(2));
3751 Ops.push_back(N->getOperand(3));
3752 Ops.push_back(Chain);
3754 case NVPTXISD::Suld2DV2I64Clamp:
3755 Opc = NVPTX::SULD_2D_V2I64_CLAMP;
3756 Ops.push_back(TexHandle);
3757 Ops.push_back(N->getOperand(2));
3758 Ops.push_back(N->getOperand(3));
3759 Ops.push_back(Chain);
3761 case NVPTXISD::Suld2DV4I8Clamp:
3762 Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3763 Ops.push_back(TexHandle);
3764 Ops.push_back(N->getOperand(2));
3765 Ops.push_back(N->getOperand(3));
3766 Ops.push_back(Chain);
3768 case NVPTXISD::Suld2DV4I16Clamp:
3769 Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3770 Ops.push_back(TexHandle);
3771 Ops.push_back(N->getOperand(2));
3772 Ops.push_back(N->getOperand(3));
3773 Ops.push_back(Chain);
3775 case NVPTXISD::Suld2DV4I32Clamp:
3776 Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3777 Ops.push_back(TexHandle);
3778 Ops.push_back(N->getOperand(2));
3779 Ops.push_back(N->getOperand(3));
3780 Ops.push_back(Chain);
3782 case NVPTXISD::Suld2DArrayI8Clamp:
3783 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3784 Ops.push_back(TexHandle);
3785 Ops.push_back(N->getOperand(2));
3786 Ops.push_back(N->getOperand(3));
3787 Ops.push_back(N->getOperand(4));
3788 Ops.push_back(Chain);
3790 case NVPTXISD::Suld2DArrayI16Clamp:
3791 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3792 Ops.push_back(TexHandle);
3793 Ops.push_back(N->getOperand(2));
3794 Ops.push_back(N->getOperand(3));
3795 Ops.push_back(N->getOperand(4));
3796 Ops.push_back(Chain);
3798 case NVPTXISD::Suld2DArrayI32Clamp:
3799 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3800 Ops.push_back(TexHandle);
3801 Ops.push_back(N->getOperand(2));
3802 Ops.push_back(N->getOperand(3));
3803 Ops.push_back(N->getOperand(4));
3804 Ops.push_back(Chain);
3806 case NVPTXISD::Suld2DArrayI64Clamp:
3807 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3808 Ops.push_back(TexHandle);
3809 Ops.push_back(N->getOperand(2));
3810 Ops.push_back(N->getOperand(3));
3811 Ops.push_back(N->getOperand(4));
3812 Ops.push_back(Chain);
3814 case NVPTXISD::Suld2DArrayV2I8Clamp:
3815 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3816 Ops.push_back(TexHandle);
3817 Ops.push_back(N->getOperand(2));
3818 Ops.push_back(N->getOperand(3));
3819 Ops.push_back(N->getOperand(4));
3820 Ops.push_back(Chain);
3822 case NVPTXISD::Suld2DArrayV2I16Clamp:
3823 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3824 Ops.push_back(TexHandle);
3825 Ops.push_back(N->getOperand(2));
3826 Ops.push_back(N->getOperand(3));
3827 Ops.push_back(N->getOperand(4));
3828 Ops.push_back(Chain);
3830 case NVPTXISD::Suld2DArrayV2I32Clamp:
3831 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3832 Ops.push_back(TexHandle);
3833 Ops.push_back(N->getOperand(2));
3834 Ops.push_back(N->getOperand(3));
3835 Ops.push_back(N->getOperand(4));
3836 Ops.push_back(Chain);
3838 case NVPTXISD::Suld2DArrayV2I64Clamp:
3839 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3840 Ops.push_back(TexHandle);
3841 Ops.push_back(N->getOperand(2));
3842 Ops.push_back(N->getOperand(3));
3843 Ops.push_back(N->getOperand(4));
3844 Ops.push_back(Chain);
3846 case NVPTXISD::Suld2DArrayV4I8Clamp:
3847 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3848 Ops.push_back(TexHandle);
3849 Ops.push_back(N->getOperand(2));
3850 Ops.push_back(N->getOperand(3));
3851 Ops.push_back(N->getOperand(4));
3852 Ops.push_back(Chain);
3854 case NVPTXISD::Suld2DArrayV4I16Clamp:
3855 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3856 Ops.push_back(TexHandle);
3857 Ops.push_back(N->getOperand(2));
3858 Ops.push_back(N->getOperand(3));
3859 Ops.push_back(N->getOperand(4));
3860 Ops.push_back(Chain);
3862 case NVPTXISD::Suld2DArrayV4I32Clamp:
3863 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3864 Ops.push_back(TexHandle);
3865 Ops.push_back(N->getOperand(2));
3866 Ops.push_back(N->getOperand(3));
3867 Ops.push_back(N->getOperand(4));
3868 Ops.push_back(Chain);
3870 case NVPTXISD::Suld3DI8Clamp:
3871 Opc = NVPTX::SULD_3D_I8_CLAMP;
3872 Ops.push_back(TexHandle);
3873 Ops.push_back(N->getOperand(2));
3874 Ops.push_back(N->getOperand(3));
3875 Ops.push_back(N->getOperand(4));
3876 Ops.push_back(Chain);
3878 case NVPTXISD::Suld3DI16Clamp:
3879 Opc = NVPTX::SULD_3D_I16_CLAMP;
3880 Ops.push_back(TexHandle);
3881 Ops.push_back(N->getOperand(2));
3882 Ops.push_back(N->getOperand(3));
3883 Ops.push_back(N->getOperand(4));
3884 Ops.push_back(Chain);
3886 case NVPTXISD::Suld3DI32Clamp:
3887 Opc = NVPTX::SULD_3D_I32_CLAMP;
3888 Ops.push_back(TexHandle);
3889 Ops.push_back(N->getOperand(2));
3890 Ops.push_back(N->getOperand(3));
3891 Ops.push_back(N->getOperand(4));
3892 Ops.push_back(Chain);
3894 case NVPTXISD::Suld3DI64Clamp:
3895 Opc = NVPTX::SULD_3D_I64_CLAMP;
3896 Ops.push_back(TexHandle);
3897 Ops.push_back(N->getOperand(2));
3898 Ops.push_back(N->getOperand(3));
3899 Ops.push_back(N->getOperand(4));
3900 Ops.push_back(Chain);
3902 case NVPTXISD::Suld3DV2I8Clamp:
3903 Opc = NVPTX::SULD_3D_V2I8_CLAMP;
3904 Ops.push_back(TexHandle);
3905 Ops.push_back(N->getOperand(2));
3906 Ops.push_back(N->getOperand(3));
3907 Ops.push_back(N->getOperand(4));
3908 Ops.push_back(Chain);
3910 case NVPTXISD::Suld3DV2I16Clamp:
3911 Opc = NVPTX::SULD_3D_V2I16_CLAMP;
3912 Ops.push_back(TexHandle);
3913 Ops.push_back(N->getOperand(2));
3914 Ops.push_back(N->getOperand(3));
3915 Ops.push_back(N->getOperand(4));
3916 Ops.push_back(Chain);
3918 case NVPTXISD::Suld3DV2I32Clamp:
3919 Opc = NVPTX::SULD_3D_V2I32_CLAMP;
3920 Ops.push_back(TexHandle);
3921 Ops.push_back(N->getOperand(2));
3922 Ops.push_back(N->getOperand(3));
3923 Ops.push_back(N->getOperand(4));
3924 Ops.push_back(Chain);
3926 case NVPTXISD::Suld3DV2I64Clamp:
3927 Opc = NVPTX::SULD_3D_V2I64_CLAMP;
3928 Ops.push_back(TexHandle);
3929 Ops.push_back(N->getOperand(2));
3930 Ops.push_back(N->getOperand(3));
3931 Ops.push_back(N->getOperand(4));
3932 Ops.push_back(Chain);
3934 case NVPTXISD::Suld3DV4I8Clamp:
3935 Opc = NVPTX::SULD_3D_V4I8_CLAMP;
3936 Ops.push_back(TexHandle);
3937 Ops.push_back(N->getOperand(2));
3938 Ops.push_back(N->getOperand(3));
3939 Ops.push_back(N->getOperand(4));
3940 Ops.push_back(Chain);
3942 case NVPTXISD::Suld3DV4I16Clamp:
3943 Opc = NVPTX::SULD_3D_V4I16_CLAMP;
3944 Ops.push_back(TexHandle);
3945 Ops.push_back(N->getOperand(2));
3946 Ops.push_back(N->getOperand(3));
3947 Ops.push_back(N->getOperand(4));
3948 Ops.push_back(Chain);
3950 case NVPTXISD::Suld3DV4I32Clamp:
3951 Opc = NVPTX::SULD_3D_V4I32_CLAMP;
3952 Ops.push_back(TexHandle);
3953 Ops.push_back(N->getOperand(2));
3954 Ops.push_back(N->getOperand(3));
3955 Ops.push_back(N->getOperand(4));
3956 Ops.push_back(Chain);
3958 case NVPTXISD::Suld1DI8Trap:
3959 Opc = NVPTX::SULD_1D_I8_TRAP;
3960 Ops.push_back(TexHandle);
3961 Ops.push_back(N->getOperand(2));
3962 Ops.push_back(Chain);
3964 case NVPTXISD::Suld1DI16Trap:
3965 Opc = NVPTX::SULD_1D_I16_TRAP;
3966 Ops.push_back(TexHandle);
3967 Ops.push_back(N->getOperand(2));
3968 Ops.push_back(Chain);
3970 case NVPTXISD::Suld1DI32Trap:
3971 Opc = NVPTX::SULD_1D_I32_TRAP;
3972 Ops.push_back(TexHandle);
3973 Ops.push_back(N->getOperand(2));
3974 Ops.push_back(Chain);
3976 case NVPTXISD::Suld1DI64Trap:
3977 Opc = NVPTX::SULD_1D_I64_TRAP;
3978 Ops.push_back(TexHandle);
3979 Ops.push_back(N->getOperand(2));
3980 Ops.push_back(Chain);
3982 case NVPTXISD::Suld1DV2I8Trap:
3983 Opc = NVPTX::SULD_1D_V2I8_TRAP;
3984 Ops.push_back(TexHandle);
3985 Ops.push_back(N->getOperand(2));
3986 Ops.push_back(Chain);
3988 case NVPTXISD::Suld1DV2I16Trap:
3989 Opc = NVPTX::SULD_1D_V2I16_TRAP;
3990 Ops.push_back(TexHandle);
3991 Ops.push_back(N->getOperand(2));
3992 Ops.push_back(Chain);
3994 case NVPTXISD::Suld1DV2I32Trap:
3995 Opc = NVPTX::SULD_1D_V2I32_TRAP;
3996 Ops.push_back(TexHandle);
3997 Ops.push_back(N->getOperand(2));
3998 Ops.push_back(Chain);
4000 case NVPTXISD::Suld1DV2I64Trap:
4001 Opc = NVPTX::SULD_1D_V2I64_TRAP;
4002 Ops.push_back(TexHandle);
4003 Ops.push_back(N->getOperand(2));
4004 Ops.push_back(Chain);
4006 case NVPTXISD::Suld1DV4I8Trap:
4007 Opc = NVPTX::SULD_1D_V4I8_TRAP;
4008 Ops.push_back(TexHandle);
4009 Ops.push_back(N->getOperand(2));
4010 Ops.push_back(Chain);
4012 case NVPTXISD::Suld1DV4I16Trap:
4013 Opc = NVPTX::SULD_1D_V4I16_TRAP;
4014 Ops.push_back(TexHandle);
4015 Ops.push_back(N->getOperand(2));
4016 Ops.push_back(Chain);
4018 case NVPTXISD::Suld1DV4I32Trap:
4019 Opc = NVPTX::SULD_1D_V4I32_TRAP;
4020 Ops.push_back(TexHandle);
4021 Ops.push_back(N->getOperand(2));
4022 Ops.push_back(Chain);
4024 case NVPTXISD::Suld1DArrayI8Trap:
4025 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
4026 Ops.push_back(TexHandle);
4027 Ops.push_back(N->getOperand(2));
4028 Ops.push_back(N->getOperand(3));
4029 Ops.push_back(Chain);
4031 case NVPTXISD::Suld1DArrayI16Trap:
4032 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4033 Ops.push_back(TexHandle);
4034 Ops.push_back(N->getOperand(2));
4035 Ops.push_back(N->getOperand(3));
4036 Ops.push_back(Chain);
4038 case NVPTXISD::Suld1DArrayI32Trap:
4039 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4040 Ops.push_back(TexHandle);
4041 Ops.push_back(N->getOperand(2));
4042 Ops.push_back(N->getOperand(3));
4043 Ops.push_back(Chain);
4045 case NVPTXISD::Suld1DArrayI64Trap:
4046 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4047 Ops.push_back(TexHandle);
4048 Ops.push_back(N->getOperand(2));
4049 Ops.push_back(N->getOperand(3));
4050 Ops.push_back(Chain);
4052 case NVPTXISD::Suld1DArrayV2I8Trap:
4053 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4054 Ops.push_back(TexHandle);
4055 Ops.push_back(N->getOperand(2));
4056 Ops.push_back(N->getOperand(3));
4057 Ops.push_back(Chain);
4059 case NVPTXISD::Suld1DArrayV2I16Trap:
4060 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4061 Ops.push_back(TexHandle);
4062 Ops.push_back(N->getOperand(2));
4063 Ops.push_back(N->getOperand(3));
4064 Ops.push_back(Chain);
4066 case NVPTXISD::Suld1DArrayV2I32Trap:
4067 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4068 Ops.push_back(TexHandle);
4069 Ops.push_back(N->getOperand(2));
4070 Ops.push_back(N->getOperand(3));
4071 Ops.push_back(Chain);
4073 case NVPTXISD::Suld1DArrayV2I64Trap:
4074 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4075 Ops.push_back(TexHandle);
4076 Ops.push_back(N->getOperand(2));
4077 Ops.push_back(N->getOperand(3));
4078 Ops.push_back(Chain);
4080 case NVPTXISD::Suld1DArrayV4I8Trap:
4081 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4082 Ops.push_back(TexHandle);
4083 Ops.push_back(N->getOperand(2));
4084 Ops.push_back(N->getOperand(3));
4085 Ops.push_back(Chain);
4087 case NVPTXISD::Suld1DArrayV4I16Trap:
4088 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4089 Ops.push_back(TexHandle);
4090 Ops.push_back(N->getOperand(2));
4091 Ops.push_back(N->getOperand(3));
4092 Ops.push_back(Chain);
4094 case NVPTXISD::Suld1DArrayV4I32Trap:
4095 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4096 Ops.push_back(TexHandle);
4097 Ops.push_back(N->getOperand(2));
4098 Ops.push_back(N->getOperand(3));
4099 Ops.push_back(Chain);
4101 case NVPTXISD::Suld2DI8Trap:
4102 Opc = NVPTX::SULD_2D_I8_TRAP;
4103 Ops.push_back(TexHandle);
4104 Ops.push_back(N->getOperand(2));
4105 Ops.push_back(N->getOperand(3));
4106 Ops.push_back(Chain);
4108 case NVPTXISD::Suld2DI16Trap:
4109 Opc = NVPTX::SULD_2D_I16_TRAP;
4110 Ops.push_back(TexHandle);
4111 Ops.push_back(N->getOperand(2));
4112 Ops.push_back(N->getOperand(3));
4113 Ops.push_back(Chain);
4115 case NVPTXISD::Suld2DI32Trap:
4116 Opc = NVPTX::SULD_2D_I32_TRAP;
4117 Ops.push_back(TexHandle);
4118 Ops.push_back(N->getOperand(2));
4119 Ops.push_back(N->getOperand(3));
4120 Ops.push_back(Chain);
4122 case NVPTXISD::Suld2DI64Trap:
4123 Opc = NVPTX::SULD_2D_I64_TRAP;
4124 Ops.push_back(TexHandle);
4125 Ops.push_back(N->getOperand(2));
4126 Ops.push_back(N->getOperand(3));
4127 Ops.push_back(Chain);
4129 case NVPTXISD::Suld2DV2I8Trap:
4130 Opc = NVPTX::SULD_2D_V2I8_TRAP;
4131 Ops.push_back(TexHandle);
4132 Ops.push_back(N->getOperand(2));
4133 Ops.push_back(N->getOperand(3));
4134 Ops.push_back(Chain);
4136 case NVPTXISD::Suld2DV2I16Trap:
4137 Opc = NVPTX::SULD_2D_V2I16_TRAP;
4138 Ops.push_back(TexHandle);
4139 Ops.push_back(N->getOperand(2));
4140 Ops.push_back(N->getOperand(3));
4141 Ops.push_back(Chain);
4143 case NVPTXISD::Suld2DV2I32Trap:
4144 Opc = NVPTX::SULD_2D_V2I32_TRAP;
4145 Ops.push_back(TexHandle);
4146 Ops.push_back(N->getOperand(2));
4147 Ops.push_back(N->getOperand(3));
4148 Ops.push_back(Chain);
4150 case NVPTXISD::Suld2DV2I64Trap:
4151 Opc = NVPTX::SULD_2D_V2I64_TRAP;
4152 Ops.push_back(TexHandle);
4153 Ops.push_back(N->getOperand(2));
4154 Ops.push_back(N->getOperand(3));
4155 Ops.push_back(Chain);
4157 case NVPTXISD::Suld2DV4I8Trap:
4158 Opc = NVPTX::SULD_2D_V4I8_TRAP;
4159 Ops.push_back(TexHandle);
4160 Ops.push_back(N->getOperand(2));
4161 Ops.push_back(N->getOperand(3));
4162 Ops.push_back(Chain);
4164 case NVPTXISD::Suld2DV4I16Trap:
4165 Opc = NVPTX::SULD_2D_V4I16_TRAP;
4166 Ops.push_back(TexHandle);
4167 Ops.push_back(N->getOperand(2));
4168 Ops.push_back(N->getOperand(3));
4169 Ops.push_back(Chain);
4171 case NVPTXISD::Suld2DV4I32Trap:
4172 Opc = NVPTX::SULD_2D_V4I32_TRAP;
4173 Ops.push_back(TexHandle);
4174 Ops.push_back(N->getOperand(2));
4175 Ops.push_back(N->getOperand(3));
4176 Ops.push_back(Chain);
4178 case NVPTXISD::Suld2DArrayI8Trap:
4179 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4180 Ops.push_back(TexHandle);
4181 Ops.push_back(N->getOperand(2));
4182 Ops.push_back(N->getOperand(3));
4183 Ops.push_back(N->getOperand(4));
4184 Ops.push_back(Chain);
4186 case NVPTXISD::Suld2DArrayI16Trap:
4187 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4188 Ops.push_back(TexHandle);
4189 Ops.push_back(N->getOperand(2));
4190 Ops.push_back(N->getOperand(3));
4191 Ops.push_back(N->getOperand(4));
4192 Ops.push_back(Chain);
4194 case NVPTXISD::Suld2DArrayI32Trap:
4195 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4196 Ops.push_back(TexHandle);
4197 Ops.push_back(N->getOperand(2));
4198 Ops.push_back(N->getOperand(3));
4199 Ops.push_back(N->getOperand(4));
4200 Ops.push_back(Chain);
4202 case NVPTXISD::Suld2DArrayI64Trap:
4203 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4204 Ops.push_back(TexHandle);
4205 Ops.push_back(N->getOperand(2));
4206 Ops.push_back(N->getOperand(3));
4207 Ops.push_back(N->getOperand(4));
4208 Ops.push_back(Chain);
4210 case NVPTXISD::Suld2DArrayV2I8Trap:
4211 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4212 Ops.push_back(TexHandle);
4213 Ops.push_back(N->getOperand(2));
4214 Ops.push_back(N->getOperand(3));
4215 Ops.push_back(N->getOperand(4));
4216 Ops.push_back(Chain);
4218 case NVPTXISD::Suld2DArrayV2I16Trap:
4219 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4220 Ops.push_back(TexHandle);
4221 Ops.push_back(N->getOperand(2));
4222 Ops.push_back(N->getOperand(3));
4223 Ops.push_back(N->getOperand(4));
4224 Ops.push_back(Chain);
4226 case NVPTXISD::Suld2DArrayV2I32Trap:
4227 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4228 Ops.push_back(TexHandle);
4229 Ops.push_back(N->getOperand(2));
4230 Ops.push_back(N->getOperand(3));
4231 Ops.push_back(N->getOperand(4));
4232 Ops.push_back(Chain);
4234 case NVPTXISD::Suld2DArrayV2I64Trap:
4235 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4236 Ops.push_back(TexHandle);
4237 Ops.push_back(N->getOperand(2));
4238 Ops.push_back(N->getOperand(3));
4239 Ops.push_back(N->getOperand(4));
4240 Ops.push_back(Chain);
4242 case NVPTXISD::Suld2DArrayV4I8Trap:
4243 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4244 Ops.push_back(TexHandle);
4245 Ops.push_back(N->getOperand(2));
4246 Ops.push_back(N->getOperand(3));
4247 Ops.push_back(N->getOperand(4));
4248 Ops.push_back(Chain);
4250 case NVPTXISD::Suld2DArrayV4I16Trap:
4251 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4252 Ops.push_back(TexHandle);
4253 Ops.push_back(N->getOperand(2));
4254 Ops.push_back(N->getOperand(3));
4255 Ops.push_back(N->getOperand(4));
4256 Ops.push_back(Chain);
4258 case NVPTXISD::Suld2DArrayV4I32Trap:
4259 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4260 Ops.push_back(TexHandle);
4261 Ops.push_back(N->getOperand(2));
4262 Ops.push_back(N->getOperand(3));
4263 Ops.push_back(N->getOperand(4));
4264 Ops.push_back(Chain);
4266 case NVPTXISD::Suld3DI8Trap:
4267 Opc = NVPTX::SULD_3D_I8_TRAP;
4268 Ops.push_back(TexHandle);
4269 Ops.push_back(N->getOperand(2));
4270 Ops.push_back(N->getOperand(3));
4271 Ops.push_back(N->getOperand(4));
4272 Ops.push_back(Chain);
4274 case NVPTXISD::Suld3DI16Trap:
4275 Opc = NVPTX::SULD_3D_I16_TRAP;
4276 Ops.push_back(TexHandle);
4277 Ops.push_back(N->getOperand(2));
4278 Ops.push_back(N->getOperand(3));
4279 Ops.push_back(N->getOperand(4));
4280 Ops.push_back(Chain);
4282 case NVPTXISD::Suld3DI32Trap:
4283 Opc = NVPTX::SULD_3D_I32_TRAP;
4284 Ops.push_back(TexHandle);
4285 Ops.push_back(N->getOperand(2));
4286 Ops.push_back(N->getOperand(3));
4287 Ops.push_back(N->getOperand(4));
4288 Ops.push_back(Chain);
4290 case NVPTXISD::Suld3DI64Trap:
4291 Opc = NVPTX::SULD_3D_I64_TRAP;
4292 Ops.push_back(TexHandle);
4293 Ops.push_back(N->getOperand(2));
4294 Ops.push_back(N->getOperand(3));
4295 Ops.push_back(N->getOperand(4));
4296 Ops.push_back(Chain);
4298 case NVPTXISD::Suld3DV2I8Trap:
4299 Opc = NVPTX::SULD_3D_V2I8_TRAP;
4300 Ops.push_back(TexHandle);
4301 Ops.push_back(N->getOperand(2));
4302 Ops.push_back(N->getOperand(3));
4303 Ops.push_back(N->getOperand(4));
4304 Ops.push_back(Chain);
4306 case NVPTXISD::Suld3DV2I16Trap:
4307 Opc = NVPTX::SULD_3D_V2I16_TRAP;
4308 Ops.push_back(TexHandle);
4309 Ops.push_back(N->getOperand(2));
4310 Ops.push_back(N->getOperand(3));
4311 Ops.push_back(N->getOperand(4));
4312 Ops.push_back(Chain);
4314 case NVPTXISD::Suld3DV2I32Trap:
4315 Opc = NVPTX::SULD_3D_V2I32_TRAP;
4316 Ops.push_back(TexHandle);
4317 Ops.push_back(N->getOperand(2));
4318 Ops.push_back(N->getOperand(3));
4319 Ops.push_back(N->getOperand(4));
4320 Ops.push_back(Chain);
4322 case NVPTXISD::Suld3DV2I64Trap:
4323 Opc = NVPTX::SULD_3D_V2I64_TRAP;
4324 Ops.push_back(TexHandle);
4325 Ops.push_back(N->getOperand(2));
4326 Ops.push_back(N->getOperand(3));
4327 Ops.push_back(N->getOperand(4));
4328 Ops.push_back(Chain);
4330 case NVPTXISD::Suld3DV4I8Trap:
4331 Opc = NVPTX::SULD_3D_V4I8_TRAP;
4332 Ops.push_back(TexHandle);
4333 Ops.push_back(N->getOperand(2));
4334 Ops.push_back(N->getOperand(3));
4335 Ops.push_back(N->getOperand(4));
4336 Ops.push_back(Chain);
4338 case NVPTXISD::Suld3DV4I16Trap:
4339 Opc = NVPTX::SULD_3D_V4I16_TRAP;
4340 Ops.push_back(TexHandle);
4341 Ops.push_back(N->getOperand(2));
4342 Ops.push_back(N->getOperand(3));
4343 Ops.push_back(N->getOperand(4));
4344 Ops.push_back(Chain);
4346 case NVPTXISD::Suld3DV4I32Trap:
4347 Opc = NVPTX::SULD_3D_V4I32_TRAP;
4348 Ops.push_back(TexHandle);
4349 Ops.push_back(N->getOperand(2));
4350 Ops.push_back(N->getOperand(3));
4351 Ops.push_back(N->getOperand(4));
4352 Ops.push_back(Chain);
4354 case NVPTXISD::Suld1DI8Zero:
4355 Opc = NVPTX::SULD_1D_I8_ZERO;
4356 Ops.push_back(TexHandle);
4357 Ops.push_back(N->getOperand(2));
4358 Ops.push_back(Chain);
4360 case NVPTXISD::Suld1DI16Zero:
4361 Opc = NVPTX::SULD_1D_I16_ZERO;
4362 Ops.push_back(TexHandle);
4363 Ops.push_back(N->getOperand(2));
4364 Ops.push_back(Chain);
4366 case NVPTXISD::Suld1DI32Zero:
4367 Opc = NVPTX::SULD_1D_I32_ZERO;
4368 Ops.push_back(TexHandle);
4369 Ops.push_back(N->getOperand(2));
4370 Ops.push_back(Chain);
4372 case NVPTXISD::Suld1DI64Zero:
4373 Opc = NVPTX::SULD_1D_I64_ZERO;
4374 Ops.push_back(TexHandle);
4375 Ops.push_back(N->getOperand(2));
4376 Ops.push_back(Chain);
4378 case NVPTXISD::Suld1DV2I8Zero:
4379 Opc = NVPTX::SULD_1D_V2I8_ZERO;
4380 Ops.push_back(TexHandle);
4381 Ops.push_back(N->getOperand(2));
4382 Ops.push_back(Chain);
4384 case NVPTXISD::Suld1DV2I16Zero:
4385 Opc = NVPTX::SULD_1D_V2I16_ZERO;
4386 Ops.push_back(TexHandle);
4387 Ops.push_back(N->getOperand(2));
4388 Ops.push_back(Chain);
4390 case NVPTXISD::Suld1DV2I32Zero:
4391 Opc = NVPTX::SULD_1D_V2I32_ZERO;
4392 Ops.push_back(TexHandle);
4393 Ops.push_back(N->getOperand(2));
4394 Ops.push_back(Chain);
4396 case NVPTXISD::Suld1DV2I64Zero:
4397 Opc = NVPTX::SULD_1D_V2I64_ZERO;
4398 Ops.push_back(TexHandle);
4399 Ops.push_back(N->getOperand(2));
4400 Ops.push_back(Chain);
4402 case NVPTXISD::Suld1DV4I8Zero:
4403 Opc = NVPTX::SULD_1D_V4I8_ZERO;
4404 Ops.push_back(TexHandle);
4405 Ops.push_back(N->getOperand(2));
4406 Ops.push_back(Chain);
4408 case NVPTXISD::Suld1DV4I16Zero:
4409 Opc = NVPTX::SULD_1D_V4I16_ZERO;
4410 Ops.push_back(TexHandle);
4411 Ops.push_back(N->getOperand(2));
4412 Ops.push_back(Chain);
4414 case NVPTXISD::Suld1DV4I32Zero:
4415 Opc = NVPTX::SULD_1D_V4I32_ZERO;
4416 Ops.push_back(TexHandle);
4417 Ops.push_back(N->getOperand(2));
4418 Ops.push_back(Chain);
4420 case NVPTXISD::Suld1DArrayI8Zero:
4421 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4422 Ops.push_back(TexHandle);
4423 Ops.push_back(N->getOperand(2));
4424 Ops.push_back(N->getOperand(3));
4425 Ops.push_back(Chain);
4427 case NVPTXISD::Suld1DArrayI16Zero:
4428 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4429 Ops.push_back(TexHandle);
4430 Ops.push_back(N->getOperand(2));
4431 Ops.push_back(N->getOperand(3));
4432 Ops.push_back(Chain);
4434 case NVPTXISD::Suld1DArrayI32Zero:
4435 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4436 Ops.push_back(TexHandle);
4437 Ops.push_back(N->getOperand(2));
4438 Ops.push_back(N->getOperand(3));
4439 Ops.push_back(Chain);
4441 case NVPTXISD::Suld1DArrayI64Zero:
4442 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4443 Ops.push_back(TexHandle);
4444 Ops.push_back(N->getOperand(2));
4445 Ops.push_back(N->getOperand(3));
4446 Ops.push_back(Chain);
4448 case NVPTXISD::Suld1DArrayV2I8Zero:
4449 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4450 Ops.push_back(TexHandle);
4451 Ops.push_back(N->getOperand(2));
4452 Ops.push_back(N->getOperand(3));
4453 Ops.push_back(Chain);
4455 case NVPTXISD::Suld1DArrayV2I16Zero:
4456 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4457 Ops.push_back(TexHandle);
4458 Ops.push_back(N->getOperand(2));
4459 Ops.push_back(N->getOperand(3));
4460 Ops.push_back(Chain);
4462 case NVPTXISD::Suld1DArrayV2I32Zero:
4463 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4464 Ops.push_back(TexHandle);
4465 Ops.push_back(N->getOperand(2));
4466 Ops.push_back(N->getOperand(3));
4467 Ops.push_back(Chain);
4469 case NVPTXISD::Suld1DArrayV2I64Zero:
4470 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4471 Ops.push_back(TexHandle);
4472 Ops.push_back(N->getOperand(2));
4473 Ops.push_back(N->getOperand(3));
4474 Ops.push_back(Chain);
4476 case NVPTXISD::Suld1DArrayV4I8Zero:
4477 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4478 Ops.push_back(TexHandle);
4479 Ops.push_back(N->getOperand(2));
4480 Ops.push_back(N->getOperand(3));
4481 Ops.push_back(Chain);
4483 case NVPTXISD::Suld1DArrayV4I16Zero:
4484 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4485 Ops.push_back(TexHandle);
4486 Ops.push_back(N->getOperand(2));
4487 Ops.push_back(N->getOperand(3));
4488 Ops.push_back(Chain);
4490 case NVPTXISD::Suld1DArrayV4I32Zero:
4491 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4492 Ops.push_back(TexHandle);
4493 Ops.push_back(N->getOperand(2));
4494 Ops.push_back(N->getOperand(3));
4495 Ops.push_back(Chain);
4497 case NVPTXISD::Suld2DI8Zero:
4498 Opc = NVPTX::SULD_2D_I8_ZERO;
4499 Ops.push_back(TexHandle);
4500 Ops.push_back(N->getOperand(2));
4501 Ops.push_back(N->getOperand(3));
4502 Ops.push_back(Chain);
4504 case NVPTXISD::Suld2DI16Zero:
4505 Opc = NVPTX::SULD_2D_I16_ZERO;
4506 Ops.push_back(TexHandle);
4507 Ops.push_back(N->getOperand(2));
4508 Ops.push_back(N->getOperand(3));
4509 Ops.push_back(Chain);
4511 case NVPTXISD::Suld2DI32Zero:
4512 Opc = NVPTX::SULD_2D_I32_ZERO;
4513 Ops.push_back(TexHandle);
4514 Ops.push_back(N->getOperand(2));
4515 Ops.push_back(N->getOperand(3));
4516 Ops.push_back(Chain);
4518 case NVPTXISD::Suld2DI64Zero:
4519 Opc = NVPTX::SULD_2D_I64_ZERO;
4520 Ops.push_back(TexHandle);
4521 Ops.push_back(N->getOperand(2));
4522 Ops.push_back(N->getOperand(3));
4523 Ops.push_back(Chain);
4525 case NVPTXISD::Suld2DV2I8Zero:
4526 Opc = NVPTX::SULD_2D_V2I8_ZERO;
4527 Ops.push_back(TexHandle);
4528 Ops.push_back(N->getOperand(2));
4529 Ops.push_back(N->getOperand(3));
4530 Ops.push_back(Chain);
4532 case NVPTXISD::Suld2DV2I16Zero:
4533 Opc = NVPTX::SULD_2D_V2I16_ZERO;
4534 Ops.push_back(TexHandle);
4535 Ops.push_back(N->getOperand(2));
4536 Ops.push_back(N->getOperand(3));
4537 Ops.push_back(Chain);
4539 case NVPTXISD::Suld2DV2I32Zero:
4540 Opc = NVPTX::SULD_2D_V2I32_ZERO;
4541 Ops.push_back(TexHandle);
4542 Ops.push_back(N->getOperand(2));
4543 Ops.push_back(N->getOperand(3));
4544 Ops.push_back(Chain);
4546 case NVPTXISD::Suld2DV2I64Zero:
4547 Opc = NVPTX::SULD_2D_V2I64_ZERO;
4548 Ops.push_back(TexHandle);
4549 Ops.push_back(N->getOperand(2));
4550 Ops.push_back(N->getOperand(3));
4551 Ops.push_back(Chain);
4553 case NVPTXISD::Suld2DV4I8Zero:
4554 Opc = NVPTX::SULD_2D_V4I8_ZERO;
4555 Ops.push_back(TexHandle);
4556 Ops.push_back(N->getOperand(2));
4557 Ops.push_back(N->getOperand(3));
4558 Ops.push_back(Chain);
4560 case NVPTXISD::Suld2DV4I16Zero:
4561 Opc = NVPTX::SULD_2D_V4I16_ZERO;
4562 Ops.push_back(TexHandle);
4563 Ops.push_back(N->getOperand(2));
4564 Ops.push_back(N->getOperand(3));
4565 Ops.push_back(Chain);
4567 case NVPTXISD::Suld2DV4I32Zero:
4568 Opc = NVPTX::SULD_2D_V4I32_ZERO;
4569 Ops.push_back(TexHandle);
4570 Ops.push_back(N->getOperand(2));
4571 Ops.push_back(N->getOperand(3));
4572 Ops.push_back(Chain);
4574 case NVPTXISD::Suld2DArrayI8Zero:
4575 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
4576 Ops.push_back(TexHandle);
4577 Ops.push_back(N->getOperand(2));
4578 Ops.push_back(N->getOperand(3));
4579 Ops.push_back(N->getOperand(4));
4580 Ops.push_back(Chain);
4582 case NVPTXISD::Suld2DArrayI16Zero:
4583 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
4584 Ops.push_back(TexHandle);
4585 Ops.push_back(N->getOperand(2));
4586 Ops.push_back(N->getOperand(3));
4587 Ops.push_back(N->getOperand(4));
4588 Ops.push_back(Chain);
4590 case NVPTXISD::Suld2DArrayI32Zero:
4591 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
4592 Ops.push_back(TexHandle);
4593 Ops.push_back(N->getOperand(2));
4594 Ops.push_back(N->getOperand(3));
4595 Ops.push_back(N->getOperand(4));
4596 Ops.push_back(Chain);
4598 case NVPTXISD::Suld2DArrayI64Zero:
4599 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
4600 Ops.push_back(TexHandle);
4601 Ops.push_back(N->getOperand(2));
4602 Ops.push_back(N->getOperand(3));
4603 Ops.push_back(N->getOperand(4));
4604 Ops.push_back(Chain);
4606 case NVPTXISD::Suld2DArrayV2I8Zero:
4607 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
4608 Ops.push_back(TexHandle);
4609 Ops.push_back(N->getOperand(2));
4610 Ops.push_back(N->getOperand(3));
4611 Ops.push_back(N->getOperand(4));
4612 Ops.push_back(Chain);
4614 case NVPTXISD::Suld2DArrayV2I16Zero:
4615 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
4616 Ops.push_back(TexHandle);
4617 Ops.push_back(N->getOperand(2));
4618 Ops.push_back(N->getOperand(3));
4619 Ops.push_back(N->getOperand(4));
4620 Ops.push_back(Chain);
4622 case NVPTXISD::Suld2DArrayV2I32Zero:
4623 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
4624 Ops.push_back(TexHandle);
4625 Ops.push_back(N->getOperand(2));
4626 Ops.push_back(N->getOperand(3));
4627 Ops.push_back(N->getOperand(4));
4628 Ops.push_back(Chain);
4630 case NVPTXISD::Suld2DArrayV2I64Zero:
4631 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
4632 Ops.push_back(TexHandle);
4633 Ops.push_back(N->getOperand(2));
4634 Ops.push_back(N->getOperand(3));
4635 Ops.push_back(N->getOperand(4));
4636 Ops.push_back(Chain);
4638 case NVPTXISD::Suld2DArrayV4I8Zero:
4639 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
4640 Ops.push_back(TexHandle);
4641 Ops.push_back(N->getOperand(2));
4642 Ops.push_back(N->getOperand(3));
4643 Ops.push_back(N->getOperand(4));
4644 Ops.push_back(Chain);
4646 case NVPTXISD::Suld2DArrayV4I16Zero:
4647 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
4648 Ops.push_back(TexHandle);
4649 Ops.push_back(N->getOperand(2));
4650 Ops.push_back(N->getOperand(3));
4651 Ops.push_back(N->getOperand(4));
4652 Ops.push_back(Chain);
4654 case NVPTXISD::Suld2DArrayV4I32Zero:
4655 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
4656 Ops.push_back(TexHandle);
4657 Ops.push_back(N->getOperand(2));
4658 Ops.push_back(N->getOperand(3));
4659 Ops.push_back(N->getOperand(4));
4660 Ops.push_back(Chain);
4662 case NVPTXISD::Suld3DI8Zero:
4663 Opc = NVPTX::SULD_3D_I8_ZERO;
4664 Ops.push_back(TexHandle);
4665 Ops.push_back(N->getOperand(2));
4666 Ops.push_back(N->getOperand(3));
4667 Ops.push_back(N->getOperand(4));
4668 Ops.push_back(Chain);
4670 case NVPTXISD::Suld3DI16Zero:
4671 Opc = NVPTX::SULD_3D_I16_ZERO;
4672 Ops.push_back(TexHandle);
4673 Ops.push_back(N->getOperand(2));
4674 Ops.push_back(N->getOperand(3));
4675 Ops.push_back(N->getOperand(4));
4676 Ops.push_back(Chain);
4678 case NVPTXISD::Suld3DI32Zero:
4679 Opc = NVPTX::SULD_3D_I32_ZERO;
4680 Ops.push_back(TexHandle);
4681 Ops.push_back(N->getOperand(2));
4682 Ops.push_back(N->getOperand(3));
4683 Ops.push_back(N->getOperand(4));
4684 Ops.push_back(Chain);
4686 case NVPTXISD::Suld3DI64Zero:
4687 Opc = NVPTX::SULD_3D_I64_ZERO;
4688 Ops.push_back(TexHandle);
4689 Ops.push_back(N->getOperand(2));
4690 Ops.push_back(N->getOperand(3));
4691 Ops.push_back(N->getOperand(4));
4692 Ops.push_back(Chain);
4694 case NVPTXISD::Suld3DV2I8Zero:
4695 Opc = NVPTX::SULD_3D_V2I8_ZERO;
4696 Ops.push_back(TexHandle);
4697 Ops.push_back(N->getOperand(2));
4698 Ops.push_back(N->getOperand(3));
4699 Ops.push_back(N->getOperand(4));
4700 Ops.push_back(Chain);
4702 case NVPTXISD::Suld3DV2I16Zero:
4703 Opc = NVPTX::SULD_3D_V2I16_ZERO;
4704 Ops.push_back(TexHandle);
4705 Ops.push_back(N->getOperand(2));
4706 Ops.push_back(N->getOperand(3));
4707 Ops.push_back(N->getOperand(4));
4708 Ops.push_back(Chain);
4710 case NVPTXISD::Suld3DV2I32Zero:
4711 Opc = NVPTX::SULD_3D_V2I32_ZERO;
4712 Ops.push_back(TexHandle);
4713 Ops.push_back(N->getOperand(2));
4714 Ops.push_back(N->getOperand(3));
4715 Ops.push_back(N->getOperand(4));
4716 Ops.push_back(Chain);
4718 case NVPTXISD::Suld3DV2I64Zero:
4719 Opc = NVPTX::SULD_3D_V2I64_ZERO;
4720 Ops.push_back(TexHandle);
4721 Ops.push_back(N->getOperand(2));
4722 Ops.push_back(N->getOperand(3));
4723 Ops.push_back(N->getOperand(4));
4724 Ops.push_back(Chain);
4726 case NVPTXISD::Suld3DV4I8Zero:
4727 Opc = NVPTX::SULD_3D_V4I8_ZERO;
4728 Ops.push_back(TexHandle);
4729 Ops.push_back(N->getOperand(2));
4730 Ops.push_back(N->getOperand(3));
4731 Ops.push_back(N->getOperand(4));
4732 Ops.push_back(Chain);
4734 case NVPTXISD::Suld3DV4I16Zero:
4735 Opc = NVPTX::SULD_3D_V4I16_ZERO;
4736 Ops.push_back(TexHandle);
4737 Ops.push_back(N->getOperand(2));
4738 Ops.push_back(N->getOperand(3));
4739 Ops.push_back(N->getOperand(4));
4740 Ops.push_back(Chain);
4742 case NVPTXISD::Suld3DV4I32Zero:
4743 Opc = NVPTX::SULD_3D_V4I32_ZERO;
4744 Ops.push_back(TexHandle);
4745 Ops.push_back(N->getOperand(2));
4746 Ops.push_back(N->getOperand(3));
4747 Ops.push_back(N->getOperand(4));
4748 Ops.push_back(Chain);
4751 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4756 /// SelectBFE - Look for instruction sequences that can be made more efficient
4757 /// by using the 'bfe' (bit-field extract) PTX instruction
4758 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
4759 SDValue LHS = N->getOperand(0);
4760 SDValue RHS = N->getOperand(1);
4764 bool IsSigned = false;
4766 if (N->getOpcode() == ISD::AND) {
4767 // Canonicalize the operands
4768 // We want 'and %val, %mask'
4769 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4770 std::swap(LHS, RHS);
4773 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
4775 // We need a constant mask on the RHS of the AND
4779 // Extract the mask bits
4780 uint64_t MaskVal = Mask->getZExtValue();
4781 if (!isMask_64(MaskVal)) {
4782 // We *could* handle shifted masks here, but doing so would require an
4783 // 'and' operation to fix up the low-order bits so we would trade
4784 // shr+and for bfe+and, which has the same throughput
4788 // How many bits are in our mask?
4789 uint64_t NumBits = CountTrailingOnes_64(MaskVal);
4790 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
4792 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4793 // We have a 'srl/and' pair, extract the effective start bit and length
4794 Val = LHS.getNode()->getOperand(0);
4795 Start = LHS.getNode()->getOperand(1);
4796 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4798 uint64_t StartVal = StartConst->getZExtValue();
4799 // How many "good" bits do we have left? "good" is defined here as bits
4800 // that exist in the original value, not shifted in.
4801 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
4802 if (NumBits > GoodBits) {
4803 // Do not handle the case where bits have been shifted in. In theory
4804 // we could handle this, but the cost is likely higher than just
4805 // emitting the srl/and pair.
4808 Start = CurDAG->getTargetConstant(StartVal, MVT::i32);
4810 // Do not handle the case where the shift amount (can be zero if no srl
4811 // was found) is not constant. We could handle this case, but it would
4812 // require run-time logic that would be more expensive than just
4813 // emitting the srl/and pair.
4817 // Do not handle the case where the LHS of the and is not a shift. While
4818 // it would be trivial to handle this case, it would just transform
4819 // 'and' -> 'bfe', but 'and' has higher-throughput.
4822 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4823 if (LHS->getOpcode() == ISD::AND) {
4824 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4826 // Shift amount must be constant
4830 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4832 SDValue AndLHS = LHS->getOperand(0);
4833 SDValue AndRHS = LHS->getOperand(1);
4835 // Canonicalize the AND to have the mask on the RHS
4836 if (isa<ConstantSDNode>(AndLHS)) {
4837 std::swap(AndLHS, AndRHS);
4840 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4842 // Mask must be constant
4846 uint64_t MaskVal = MaskCnst->getZExtValue();
4849 if (isMask_64(MaskVal)) {
4851 // The number of bits in the result bitfield will be the number of
4852 // trailing ones (the AND) minus the number of bits we shift off
4853 NumBits = CountTrailingOnes_64(MaskVal) - ShiftAmt;
4854 } else if (isShiftedMask_64(MaskVal)) {
4855 NumZeros = countTrailingZeros(MaskVal);
4856 unsigned NumOnes = CountTrailingOnes_64(MaskVal >> NumZeros);
4857 // The number of bits in the result bitfield will be the number of
4858 // trailing zeros plus the number of set bits in the mask minus the
4859 // number of bits we shift off
4860 NumBits = NumZeros + NumOnes - ShiftAmt;
4862 // This is not a mask we can handle
4866 if (ShiftAmt < NumZeros) {
4867 // Handling this case would require extra logic that would make this
4868 // transformation non-profitable
4873 Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32);
4874 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
4875 } else if (LHS->getOpcode() == ISD::SHL) {
4876 // Here, we have a pattern like:
4878 // (sra (shl val, NN), MM)
4880 // (srl (shl val, NN), MM)
4882 // If MM >= NN, we can efficiently optimize this with bfe
4883 Val = LHS->getOperand(0);
4885 SDValue ShlRHS = LHS->getOperand(1);
4886 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
4888 // Shift amount must be constant
4891 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
4893 SDValue ShrRHS = RHS;
4894 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
4896 // Shift amount must be constant
4899 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
4901 // To avoid extra codegen and be profitable, we need Outer >= Inner
4902 if (OuterShiftAmt < InnerShiftAmt) {
4906 // If the outer shift is more than the type size, we have no bitfield to
4907 // extract (since we also check that the inner shift is <= the outer shift
4908 // then this also implies that the inner shift is < the type size)
4909 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
4914 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32);
4916 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
4917 OuterShiftAmt, MVT::i32);
4919 if (N->getOpcode() == ISD::SRA) {
4920 // If we have a arithmetic right shift, we need to use the signed bfe
4935 // For the BFE operations we form here from "and" and "srl", always use the
4936 // unsigned variants.
4937 if (Val.getValueType() == MVT::i32) {
4939 Opc = NVPTX::BFE_S32rii;
4941 Opc = NVPTX::BFE_U32rii;
4943 } else if (Val.getValueType() == MVT::i64) {
4945 Opc = NVPTX::BFE_S64rii;
4947 Opc = NVPTX::BFE_U64rii;
4950 // We cannot handle this type
4959 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4964 // SelectDirectAddr - Match a direct address for DAG.
4965 // A direct address could be a globaladdress or externalsymbol.
4966 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
4967 // Return true if TGA or ES.
4968 if (N.getOpcode() == ISD::TargetGlobalAddress ||
4969 N.getOpcode() == ISD::TargetExternalSymbol) {
4973 if (N.getOpcode() == NVPTXISD::Wrapper) {
4974 Address = N.getOperand(0);
4977 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
4978 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
4979 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
4980 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
4981 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
4987 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
4988 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
4989 if (Addr.getOpcode() == ISD::ADD) {
4990 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
4991 SDValue base = Addr.getOperand(0);
4992 if (SelectDirectAddr(base, Base)) {
4993 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
5002 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
5003 SDValue &Base, SDValue &Offset) {
5004 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
5008 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
5009 SDValue &Base, SDValue &Offset) {
5010 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
5014 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
5015 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5016 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
5017 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5018 Offset = CurDAG->getTargetConstant(0, mvt);
5021 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
5022 Addr.getOpcode() == ISD::TargetGlobalAddress)
5023 return false; // direct calls.
5025 if (Addr.getOpcode() == ISD::ADD) {
5026 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
5029 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5030 if (FrameIndexSDNode *FIN =
5031 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
5032 // Constant offset from frame ref.
5033 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5035 Base = Addr.getOperand(0);
5036 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
5044 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5045 SDValue &Base, SDValue &Offset) {
5046 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5050 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5051 SDValue &Base, SDValue &Offset) {
5052 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5055 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5056 unsigned int spN) const {
5057 const Value *Src = nullptr;
5058 // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
5059 // the classof() for MemSDNode does not include MemIntrinsicSDNode
5060 // (See SelectionDAGNodes.h). So we need to check for both.
5061 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
5062 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5064 Src = mN->getMemOperand()->getValue();
5065 } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
5066 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5068 Src = mN->getMemOperand()->getValue();
5072 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
5073 return (PT->getAddressSpace() == spN);
5077 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5078 /// inline asm expressions.
5079 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
5080 const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
5082 switch (ConstraintCode) {
5086 if (SelectDirectAddr(Op, Op0)) {
5087 OutOps.push_back(Op0);
5088 OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
5091 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5092 OutOps.push_back(Op0);
5093 OutOps.push_back(Op1);