1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines an instruction selector for the NVPTX target.
12 //===----------------------------------------------------------------------===//
14 #include "NVPTXISelDAGToDAG.h"
15 #include "llvm/IR/GlobalValue.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/Target/TargetIntrinsicInfo.h"
25 #define DEBUG_TYPE "nvptx-isel"
27 static cl::opt<int> UsePrecDivF32(
28 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
29 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
30 " IEEE Compliant F32 div.rnd if available."),
34 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
35 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
39 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
40 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
44 /// createNVPTXISelDag - This pass converts a legalized DAG into a
45 /// NVPTX-specific DAG, ready for instruction scheduling.
46 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
47 llvm::CodeGenOpt::Level OptLevel) {
48 return new NVPTXDAGToDAGISel(TM, OptLevel);
51 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
52 CodeGenOpt::Level OptLevel)
53 : SelectionDAGISel(tm, OptLevel) {
54 doMulWide = (OptLevel > 0);
57 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
58 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
59 return SelectionDAGISel::runOnMachineFunction(MF);
62 int NVPTXDAGToDAGISel::getDivF32Level() const {
63 if (UsePrecDivF32.getNumOccurrences() > 0) {
64 // If nvptx-prec-div32=N is used on the command-line, always honor it
67 // Otherwise, use div.approx if fast math is enabled
68 if (TM.Options.UnsafeFPMath)
75 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
76 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
77 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
78 return UsePrecSqrtF32;
80 // Otherwise, use sqrt.approx if fast math is enabled
81 if (TM.Options.UnsafeFPMath)
88 bool NVPTXDAGToDAGISel::useF32FTZ() const {
89 if (FtzEnabled.getNumOccurrences() > 0) {
90 // If nvptx-f32ftz is used on the command-line, always honor it
93 const Function *F = MF->getFunction();
94 // Otherwise, check for an nvptx-f32ftz attribute on the function
95 if (F->hasFnAttribute("nvptx-f32ftz"))
96 return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
102 bool NVPTXDAGToDAGISel::allowFMA() const {
103 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
104 return TL->allowFMA(*MF, OptLevel);
107 /// Select - Select instructions not customized! Used for
108 /// expanded, promoted and normal instructions.
109 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
111 if (N->isMachineOpcode()) {
113 return nullptr; // Already selected.
116 SDNode *ResNode = nullptr;
117 switch (N->getOpcode()) {
119 ResNode = SelectLoad(N);
122 ResNode = SelectStore(N);
124 case NVPTXISD::LoadV2:
125 case NVPTXISD::LoadV4:
126 ResNode = SelectLoadVector(N);
128 case NVPTXISD::LDGV2:
129 case NVPTXISD::LDGV4:
130 case NVPTXISD::LDUV2:
131 case NVPTXISD::LDUV4:
132 ResNode = SelectLDGLDU(N);
134 case NVPTXISD::StoreV2:
135 case NVPTXISD::StoreV4:
136 ResNode = SelectStoreVector(N);
138 case NVPTXISD::LoadParam:
139 case NVPTXISD::LoadParamV2:
140 case NVPTXISD::LoadParamV4:
141 ResNode = SelectLoadParam(N);
143 case NVPTXISD::StoreRetval:
144 case NVPTXISD::StoreRetvalV2:
145 case NVPTXISD::StoreRetvalV4:
146 ResNode = SelectStoreRetval(N);
148 case NVPTXISD::StoreParam:
149 case NVPTXISD::StoreParamV2:
150 case NVPTXISD::StoreParamV4:
151 case NVPTXISD::StoreParamS32:
152 case NVPTXISD::StoreParamU32:
153 ResNode = SelectStoreParam(N);
155 case ISD::INTRINSIC_WO_CHAIN:
156 ResNode = SelectIntrinsicNoChain(N);
158 case ISD::INTRINSIC_W_CHAIN:
159 ResNode = SelectIntrinsicChain(N);
161 case NVPTXISD::Tex1DFloatS32:
162 case NVPTXISD::Tex1DFloatFloat:
163 case NVPTXISD::Tex1DFloatFloatLevel:
164 case NVPTXISD::Tex1DFloatFloatGrad:
165 case NVPTXISD::Tex1DS32S32:
166 case NVPTXISD::Tex1DS32Float:
167 case NVPTXISD::Tex1DS32FloatLevel:
168 case NVPTXISD::Tex1DS32FloatGrad:
169 case NVPTXISD::Tex1DU32S32:
170 case NVPTXISD::Tex1DU32Float:
171 case NVPTXISD::Tex1DU32FloatLevel:
172 case NVPTXISD::Tex1DU32FloatGrad:
173 case NVPTXISD::Tex1DArrayFloatS32:
174 case NVPTXISD::Tex1DArrayFloatFloat:
175 case NVPTXISD::Tex1DArrayFloatFloatLevel:
176 case NVPTXISD::Tex1DArrayFloatFloatGrad:
177 case NVPTXISD::Tex1DArrayS32S32:
178 case NVPTXISD::Tex1DArrayS32Float:
179 case NVPTXISD::Tex1DArrayS32FloatLevel:
180 case NVPTXISD::Tex1DArrayS32FloatGrad:
181 case NVPTXISD::Tex1DArrayU32S32:
182 case NVPTXISD::Tex1DArrayU32Float:
183 case NVPTXISD::Tex1DArrayU32FloatLevel:
184 case NVPTXISD::Tex1DArrayU32FloatGrad:
185 case NVPTXISD::Tex2DFloatS32:
186 case NVPTXISD::Tex2DFloatFloat:
187 case NVPTXISD::Tex2DFloatFloatLevel:
188 case NVPTXISD::Tex2DFloatFloatGrad:
189 case NVPTXISD::Tex2DS32S32:
190 case NVPTXISD::Tex2DS32Float:
191 case NVPTXISD::Tex2DS32FloatLevel:
192 case NVPTXISD::Tex2DS32FloatGrad:
193 case NVPTXISD::Tex2DU32S32:
194 case NVPTXISD::Tex2DU32Float:
195 case NVPTXISD::Tex2DU32FloatLevel:
196 case NVPTXISD::Tex2DU32FloatGrad:
197 case NVPTXISD::Tex2DArrayFloatS32:
198 case NVPTXISD::Tex2DArrayFloatFloat:
199 case NVPTXISD::Tex2DArrayFloatFloatLevel:
200 case NVPTXISD::Tex2DArrayFloatFloatGrad:
201 case NVPTXISD::Tex2DArrayS32S32:
202 case NVPTXISD::Tex2DArrayS32Float:
203 case NVPTXISD::Tex2DArrayS32FloatLevel:
204 case NVPTXISD::Tex2DArrayS32FloatGrad:
205 case NVPTXISD::Tex2DArrayU32S32:
206 case NVPTXISD::Tex2DArrayU32Float:
207 case NVPTXISD::Tex2DArrayU32FloatLevel:
208 case NVPTXISD::Tex2DArrayU32FloatGrad:
209 case NVPTXISD::Tex3DFloatS32:
210 case NVPTXISD::Tex3DFloatFloat:
211 case NVPTXISD::Tex3DFloatFloatLevel:
212 case NVPTXISD::Tex3DFloatFloatGrad:
213 case NVPTXISD::Tex3DS32S32:
214 case NVPTXISD::Tex3DS32Float:
215 case NVPTXISD::Tex3DS32FloatLevel:
216 case NVPTXISD::Tex3DS32FloatGrad:
217 case NVPTXISD::Tex3DU32S32:
218 case NVPTXISD::Tex3DU32Float:
219 case NVPTXISD::Tex3DU32FloatLevel:
220 case NVPTXISD::Tex3DU32FloatGrad:
221 case NVPTXISD::TexCubeFloatFloat:
222 case NVPTXISD::TexCubeFloatFloatLevel:
223 case NVPTXISD::TexCubeS32Float:
224 case NVPTXISD::TexCubeS32FloatLevel:
225 case NVPTXISD::TexCubeU32Float:
226 case NVPTXISD::TexCubeU32FloatLevel:
227 case NVPTXISD::TexCubeArrayFloatFloat:
228 case NVPTXISD::TexCubeArrayFloatFloatLevel:
229 case NVPTXISD::TexCubeArrayS32Float:
230 case NVPTXISD::TexCubeArrayS32FloatLevel:
231 case NVPTXISD::TexCubeArrayU32Float:
232 case NVPTXISD::TexCubeArrayU32FloatLevel:
233 case NVPTXISD::Tld4R2DFloatFloat:
234 case NVPTXISD::Tld4G2DFloatFloat:
235 case NVPTXISD::Tld4B2DFloatFloat:
236 case NVPTXISD::Tld4A2DFloatFloat:
237 case NVPTXISD::Tld4R2DS64Float:
238 case NVPTXISD::Tld4G2DS64Float:
239 case NVPTXISD::Tld4B2DS64Float:
240 case NVPTXISD::Tld4A2DS64Float:
241 case NVPTXISD::Tld4R2DU64Float:
242 case NVPTXISD::Tld4G2DU64Float:
243 case NVPTXISD::Tld4B2DU64Float:
244 case NVPTXISD::Tld4A2DU64Float:
245 case NVPTXISD::TexUnified1DFloatS32:
246 case NVPTXISD::TexUnified1DFloatFloat:
247 case NVPTXISD::TexUnified1DFloatFloatLevel:
248 case NVPTXISD::TexUnified1DFloatFloatGrad:
249 case NVPTXISD::TexUnified1DS32S32:
250 case NVPTXISD::TexUnified1DS32Float:
251 case NVPTXISD::TexUnified1DS32FloatLevel:
252 case NVPTXISD::TexUnified1DS32FloatGrad:
253 case NVPTXISD::TexUnified1DU32S32:
254 case NVPTXISD::TexUnified1DU32Float:
255 case NVPTXISD::TexUnified1DU32FloatLevel:
256 case NVPTXISD::TexUnified1DU32FloatGrad:
257 case NVPTXISD::TexUnified1DArrayFloatS32:
258 case NVPTXISD::TexUnified1DArrayFloatFloat:
259 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
260 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
261 case NVPTXISD::TexUnified1DArrayS32S32:
262 case NVPTXISD::TexUnified1DArrayS32Float:
263 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
264 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
265 case NVPTXISD::TexUnified1DArrayU32S32:
266 case NVPTXISD::TexUnified1DArrayU32Float:
267 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
268 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
269 case NVPTXISD::TexUnified2DFloatS32:
270 case NVPTXISD::TexUnified2DFloatFloat:
271 case NVPTXISD::TexUnified2DFloatFloatLevel:
272 case NVPTXISD::TexUnified2DFloatFloatGrad:
273 case NVPTXISD::TexUnified2DS32S32:
274 case NVPTXISD::TexUnified2DS32Float:
275 case NVPTXISD::TexUnified2DS32FloatLevel:
276 case NVPTXISD::TexUnified2DS32FloatGrad:
277 case NVPTXISD::TexUnified2DU32S32:
278 case NVPTXISD::TexUnified2DU32Float:
279 case NVPTXISD::TexUnified2DU32FloatLevel:
280 case NVPTXISD::TexUnified2DU32FloatGrad:
281 case NVPTXISD::TexUnified2DArrayFloatS32:
282 case NVPTXISD::TexUnified2DArrayFloatFloat:
283 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
284 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
285 case NVPTXISD::TexUnified2DArrayS32S32:
286 case NVPTXISD::TexUnified2DArrayS32Float:
287 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
288 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
289 case NVPTXISD::TexUnified2DArrayU32S32:
290 case NVPTXISD::TexUnified2DArrayU32Float:
291 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
292 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
293 case NVPTXISD::TexUnified3DFloatS32:
294 case NVPTXISD::TexUnified3DFloatFloat:
295 case NVPTXISD::TexUnified3DFloatFloatLevel:
296 case NVPTXISD::TexUnified3DFloatFloatGrad:
297 case NVPTXISD::TexUnified3DS32S32:
298 case NVPTXISD::TexUnified3DS32Float:
299 case NVPTXISD::TexUnified3DS32FloatLevel:
300 case NVPTXISD::TexUnified3DS32FloatGrad:
301 case NVPTXISD::TexUnified3DU32S32:
302 case NVPTXISD::TexUnified3DU32Float:
303 case NVPTXISD::TexUnified3DU32FloatLevel:
304 case NVPTXISD::TexUnified3DU32FloatGrad:
305 case NVPTXISD::TexUnifiedCubeFloatFloat:
306 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
307 case NVPTXISD::TexUnifiedCubeS32Float:
308 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
309 case NVPTXISD::TexUnifiedCubeU32Float:
310 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
311 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
312 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
313 case NVPTXISD::TexUnifiedCubeArrayS32Float:
314 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
315 case NVPTXISD::TexUnifiedCubeArrayU32Float:
316 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
317 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
318 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
319 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
320 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
321 case NVPTXISD::Tld4UnifiedR2DS64Float:
322 case NVPTXISD::Tld4UnifiedG2DS64Float:
323 case NVPTXISD::Tld4UnifiedB2DS64Float:
324 case NVPTXISD::Tld4UnifiedA2DS64Float:
325 case NVPTXISD::Tld4UnifiedR2DU64Float:
326 case NVPTXISD::Tld4UnifiedG2DU64Float:
327 case NVPTXISD::Tld4UnifiedB2DU64Float:
328 case NVPTXISD::Tld4UnifiedA2DU64Float:
329 ResNode = SelectTextureIntrinsic(N);
331 case NVPTXISD::Suld1DI8Clamp:
332 case NVPTXISD::Suld1DI16Clamp:
333 case NVPTXISD::Suld1DI32Clamp:
334 case NVPTXISD::Suld1DI64Clamp:
335 case NVPTXISD::Suld1DV2I8Clamp:
336 case NVPTXISD::Suld1DV2I16Clamp:
337 case NVPTXISD::Suld1DV2I32Clamp:
338 case NVPTXISD::Suld1DV2I64Clamp:
339 case NVPTXISD::Suld1DV4I8Clamp:
340 case NVPTXISD::Suld1DV4I16Clamp:
341 case NVPTXISD::Suld1DV4I32Clamp:
342 case NVPTXISD::Suld1DArrayI8Clamp:
343 case NVPTXISD::Suld1DArrayI16Clamp:
344 case NVPTXISD::Suld1DArrayI32Clamp:
345 case NVPTXISD::Suld1DArrayI64Clamp:
346 case NVPTXISD::Suld1DArrayV2I8Clamp:
347 case NVPTXISD::Suld1DArrayV2I16Clamp:
348 case NVPTXISD::Suld1DArrayV2I32Clamp:
349 case NVPTXISD::Suld1DArrayV2I64Clamp:
350 case NVPTXISD::Suld1DArrayV4I8Clamp:
351 case NVPTXISD::Suld1DArrayV4I16Clamp:
352 case NVPTXISD::Suld1DArrayV4I32Clamp:
353 case NVPTXISD::Suld2DI8Clamp:
354 case NVPTXISD::Suld2DI16Clamp:
355 case NVPTXISD::Suld2DI32Clamp:
356 case NVPTXISD::Suld2DI64Clamp:
357 case NVPTXISD::Suld2DV2I8Clamp:
358 case NVPTXISD::Suld2DV2I16Clamp:
359 case NVPTXISD::Suld2DV2I32Clamp:
360 case NVPTXISD::Suld2DV2I64Clamp:
361 case NVPTXISD::Suld2DV4I8Clamp:
362 case NVPTXISD::Suld2DV4I16Clamp:
363 case NVPTXISD::Suld2DV4I32Clamp:
364 case NVPTXISD::Suld2DArrayI8Clamp:
365 case NVPTXISD::Suld2DArrayI16Clamp:
366 case NVPTXISD::Suld2DArrayI32Clamp:
367 case NVPTXISD::Suld2DArrayI64Clamp:
368 case NVPTXISD::Suld2DArrayV2I8Clamp:
369 case NVPTXISD::Suld2DArrayV2I16Clamp:
370 case NVPTXISD::Suld2DArrayV2I32Clamp:
371 case NVPTXISD::Suld2DArrayV2I64Clamp:
372 case NVPTXISD::Suld2DArrayV4I8Clamp:
373 case NVPTXISD::Suld2DArrayV4I16Clamp:
374 case NVPTXISD::Suld2DArrayV4I32Clamp:
375 case NVPTXISD::Suld3DI8Clamp:
376 case NVPTXISD::Suld3DI16Clamp:
377 case NVPTXISD::Suld3DI32Clamp:
378 case NVPTXISD::Suld3DI64Clamp:
379 case NVPTXISD::Suld3DV2I8Clamp:
380 case NVPTXISD::Suld3DV2I16Clamp:
381 case NVPTXISD::Suld3DV2I32Clamp:
382 case NVPTXISD::Suld3DV2I64Clamp:
383 case NVPTXISD::Suld3DV4I8Clamp:
384 case NVPTXISD::Suld3DV4I16Clamp:
385 case NVPTXISD::Suld3DV4I32Clamp:
386 case NVPTXISD::Suld1DI8Trap:
387 case NVPTXISD::Suld1DI16Trap:
388 case NVPTXISD::Suld1DI32Trap:
389 case NVPTXISD::Suld1DI64Trap:
390 case NVPTXISD::Suld1DV2I8Trap:
391 case NVPTXISD::Suld1DV2I16Trap:
392 case NVPTXISD::Suld1DV2I32Trap:
393 case NVPTXISD::Suld1DV2I64Trap:
394 case NVPTXISD::Suld1DV4I8Trap:
395 case NVPTXISD::Suld1DV4I16Trap:
396 case NVPTXISD::Suld1DV4I32Trap:
397 case NVPTXISD::Suld1DArrayI8Trap:
398 case NVPTXISD::Suld1DArrayI16Trap:
399 case NVPTXISD::Suld1DArrayI32Trap:
400 case NVPTXISD::Suld1DArrayI64Trap:
401 case NVPTXISD::Suld1DArrayV2I8Trap:
402 case NVPTXISD::Suld1DArrayV2I16Trap:
403 case NVPTXISD::Suld1DArrayV2I32Trap:
404 case NVPTXISD::Suld1DArrayV2I64Trap:
405 case NVPTXISD::Suld1DArrayV4I8Trap:
406 case NVPTXISD::Suld1DArrayV4I16Trap:
407 case NVPTXISD::Suld1DArrayV4I32Trap:
408 case NVPTXISD::Suld2DI8Trap:
409 case NVPTXISD::Suld2DI16Trap:
410 case NVPTXISD::Suld2DI32Trap:
411 case NVPTXISD::Suld2DI64Trap:
412 case NVPTXISD::Suld2DV2I8Trap:
413 case NVPTXISD::Suld2DV2I16Trap:
414 case NVPTXISD::Suld2DV2I32Trap:
415 case NVPTXISD::Suld2DV2I64Trap:
416 case NVPTXISD::Suld2DV4I8Trap:
417 case NVPTXISD::Suld2DV4I16Trap:
418 case NVPTXISD::Suld2DV4I32Trap:
419 case NVPTXISD::Suld2DArrayI8Trap:
420 case NVPTXISD::Suld2DArrayI16Trap:
421 case NVPTXISD::Suld2DArrayI32Trap:
422 case NVPTXISD::Suld2DArrayI64Trap:
423 case NVPTXISD::Suld2DArrayV2I8Trap:
424 case NVPTXISD::Suld2DArrayV2I16Trap:
425 case NVPTXISD::Suld2DArrayV2I32Trap:
426 case NVPTXISD::Suld2DArrayV2I64Trap:
427 case NVPTXISD::Suld2DArrayV4I8Trap:
428 case NVPTXISD::Suld2DArrayV4I16Trap:
429 case NVPTXISD::Suld2DArrayV4I32Trap:
430 case NVPTXISD::Suld3DI8Trap:
431 case NVPTXISD::Suld3DI16Trap:
432 case NVPTXISD::Suld3DI32Trap:
433 case NVPTXISD::Suld3DI64Trap:
434 case NVPTXISD::Suld3DV2I8Trap:
435 case NVPTXISD::Suld3DV2I16Trap:
436 case NVPTXISD::Suld3DV2I32Trap:
437 case NVPTXISD::Suld3DV2I64Trap:
438 case NVPTXISD::Suld3DV4I8Trap:
439 case NVPTXISD::Suld3DV4I16Trap:
440 case NVPTXISD::Suld3DV4I32Trap:
441 case NVPTXISD::Suld1DI8Zero:
442 case NVPTXISD::Suld1DI16Zero:
443 case NVPTXISD::Suld1DI32Zero:
444 case NVPTXISD::Suld1DI64Zero:
445 case NVPTXISD::Suld1DV2I8Zero:
446 case NVPTXISD::Suld1DV2I16Zero:
447 case NVPTXISD::Suld1DV2I32Zero:
448 case NVPTXISD::Suld1DV2I64Zero:
449 case NVPTXISD::Suld1DV4I8Zero:
450 case NVPTXISD::Suld1DV4I16Zero:
451 case NVPTXISD::Suld1DV4I32Zero:
452 case NVPTXISD::Suld1DArrayI8Zero:
453 case NVPTXISD::Suld1DArrayI16Zero:
454 case NVPTXISD::Suld1DArrayI32Zero:
455 case NVPTXISD::Suld1DArrayI64Zero:
456 case NVPTXISD::Suld1DArrayV2I8Zero:
457 case NVPTXISD::Suld1DArrayV2I16Zero:
458 case NVPTXISD::Suld1DArrayV2I32Zero:
459 case NVPTXISD::Suld1DArrayV2I64Zero:
460 case NVPTXISD::Suld1DArrayV4I8Zero:
461 case NVPTXISD::Suld1DArrayV4I16Zero:
462 case NVPTXISD::Suld1DArrayV4I32Zero:
463 case NVPTXISD::Suld2DI8Zero:
464 case NVPTXISD::Suld2DI16Zero:
465 case NVPTXISD::Suld2DI32Zero:
466 case NVPTXISD::Suld2DI64Zero:
467 case NVPTXISD::Suld2DV2I8Zero:
468 case NVPTXISD::Suld2DV2I16Zero:
469 case NVPTXISD::Suld2DV2I32Zero:
470 case NVPTXISD::Suld2DV2I64Zero:
471 case NVPTXISD::Suld2DV4I8Zero:
472 case NVPTXISD::Suld2DV4I16Zero:
473 case NVPTXISD::Suld2DV4I32Zero:
474 case NVPTXISD::Suld2DArrayI8Zero:
475 case NVPTXISD::Suld2DArrayI16Zero:
476 case NVPTXISD::Suld2DArrayI32Zero:
477 case NVPTXISD::Suld2DArrayI64Zero:
478 case NVPTXISD::Suld2DArrayV2I8Zero:
479 case NVPTXISD::Suld2DArrayV2I16Zero:
480 case NVPTXISD::Suld2DArrayV2I32Zero:
481 case NVPTXISD::Suld2DArrayV2I64Zero:
482 case NVPTXISD::Suld2DArrayV4I8Zero:
483 case NVPTXISD::Suld2DArrayV4I16Zero:
484 case NVPTXISD::Suld2DArrayV4I32Zero:
485 case NVPTXISD::Suld3DI8Zero:
486 case NVPTXISD::Suld3DI16Zero:
487 case NVPTXISD::Suld3DI32Zero:
488 case NVPTXISD::Suld3DI64Zero:
489 case NVPTXISD::Suld3DV2I8Zero:
490 case NVPTXISD::Suld3DV2I16Zero:
491 case NVPTXISD::Suld3DV2I32Zero:
492 case NVPTXISD::Suld3DV2I64Zero:
493 case NVPTXISD::Suld3DV4I8Zero:
494 case NVPTXISD::Suld3DV4I16Zero:
495 case NVPTXISD::Suld3DV4I32Zero:
496 ResNode = SelectSurfaceIntrinsic(N);
502 ResNode = SelectBFE(N);
504 case ISD::ADDRSPACECAST:
505 ResNode = SelectAddrSpaceCast(N);
512 return SelectCode(N);
515 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
516 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
520 case Intrinsic::nvvm_ldg_global_f:
521 case Intrinsic::nvvm_ldg_global_i:
522 case Intrinsic::nvvm_ldg_global_p:
523 case Intrinsic::nvvm_ldu_global_f:
524 case Intrinsic::nvvm_ldu_global_i:
525 case Intrinsic::nvvm_ldu_global_p:
526 return SelectLDGLDU(N);
530 static unsigned int getCodeAddrSpace(MemSDNode *N) {
531 const Value *Src = N->getMemOperand()->getValue();
534 return NVPTX::PTXLdStInstCode::GENERIC;
536 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
537 switch (PT->getAddressSpace()) {
538 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
539 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
540 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
541 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
542 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
543 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
547 return NVPTX::PTXLdStInstCode::GENERIC;
550 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
551 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
555 case Intrinsic::nvvm_texsurf_handle_internal:
556 return SelectTexSurfHandle(N);
560 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
561 // Op 0 is the intrinsic ID
562 SDValue Wrapper = N->getOperand(1);
563 SDValue GlobalVal = Wrapper.getOperand(0);
564 return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
568 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
569 SDValue Src = N->getOperand(0);
570 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
571 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
572 unsigned DstAddrSpace = CastN->getDestAddressSpace();
574 assert(SrcAddrSpace != DstAddrSpace &&
575 "addrspacecast must be between different address spaces");
577 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
578 // Specific to generic
580 switch (SrcAddrSpace) {
581 default: report_fatal_error("Bad address space in addrspacecast");
582 case ADDRESS_SPACE_GLOBAL:
583 Opc = Subtarget->is64Bit() ? NVPTX::cvta_global_yes_64
584 : NVPTX::cvta_global_yes;
586 case ADDRESS_SPACE_SHARED:
587 Opc = Subtarget->is64Bit() ? NVPTX::cvta_shared_yes_64
588 : NVPTX::cvta_shared_yes;
590 case ADDRESS_SPACE_CONST:
591 Opc = Subtarget->is64Bit() ? NVPTX::cvta_const_yes_64
592 : NVPTX::cvta_const_yes;
594 case ADDRESS_SPACE_LOCAL:
595 Opc = Subtarget->is64Bit() ? NVPTX::cvta_local_yes_64
596 : NVPTX::cvta_local_yes;
599 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
601 // Generic to specific
602 if (SrcAddrSpace != 0)
603 report_fatal_error("Cannot cast between two non-generic address spaces");
605 switch (DstAddrSpace) {
606 default: report_fatal_error("Bad address space in addrspacecast");
607 case ADDRESS_SPACE_GLOBAL:
608 Opc = Subtarget->is64Bit() ? NVPTX::cvta_to_global_yes_64
609 : NVPTX::cvta_to_global_yes;
611 case ADDRESS_SPACE_SHARED:
612 Opc = Subtarget->is64Bit() ? NVPTX::cvta_to_shared_yes_64
613 : NVPTX::cvta_to_shared_yes;
615 case ADDRESS_SPACE_CONST:
616 Opc = Subtarget->is64Bit() ? NVPTX::cvta_to_const_yes_64
617 : NVPTX::cvta_to_const_yes;
619 case ADDRESS_SPACE_LOCAL:
620 Opc = Subtarget->is64Bit() ? NVPTX::cvta_to_local_yes_64
621 : NVPTX::cvta_to_local_yes;
624 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
628 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
630 LoadSDNode *LD = cast<LoadSDNode>(N);
631 EVT LoadedVT = LD->getMemoryVT();
632 SDNode *NVPTXLD = nullptr;
634 // do not support pre/post inc/dec
638 if (!LoadedVT.isSimple())
641 // Address Space Setting
642 unsigned int codeAddrSpace = getCodeAddrSpace(LD);
645 // - .volatile is only availalble for .global and .shared
646 bool isVolatile = LD->isVolatile();
647 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
648 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
649 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
653 MVT SimpleVT = LoadedVT.getSimpleVT();
654 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
655 if (SimpleVT.isVector()) {
656 unsigned num = SimpleVT.getVectorNumElements();
658 vecType = NVPTX::PTXLdStInstCode::V2;
660 vecType = NVPTX::PTXLdStInstCode::V4;
665 // Type Setting: fromType + fromTypeWidth
667 // Sign : ISD::SEXTLOAD
668 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
670 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
671 MVT ScalarVT = SimpleVT.getScalarType();
672 // Read at least 8 bits (predicates are stored as 8-bit values)
673 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
674 unsigned int fromType;
675 if ((LD->getExtensionType() == ISD::SEXTLOAD))
676 fromType = NVPTX::PTXLdStInstCode::Signed;
677 else if (ScalarVT.isFloatingPoint())
678 fromType = NVPTX::PTXLdStInstCode::Float;
680 fromType = NVPTX::PTXLdStInstCode::Unsigned;
682 // Create the machine instruction DAG
683 SDValue Chain = N->getOperand(0);
684 SDValue N1 = N->getOperand(1);
686 SDValue Offset, Base;
688 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
690 if (SelectDirectAddr(N1, Addr)) {
693 Opcode = NVPTX::LD_i8_avar;
696 Opcode = NVPTX::LD_i16_avar;
699 Opcode = NVPTX::LD_i32_avar;
702 Opcode = NVPTX::LD_i64_avar;
705 Opcode = NVPTX::LD_f32_avar;
708 Opcode = NVPTX::LD_f64_avar;
713 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
714 getI32Imm(vecType), getI32Imm(fromType),
715 getI32Imm(fromTypeWidth), Addr, Chain };
716 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
717 } else if (Subtarget->is64Bit()
718 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
719 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
722 Opcode = NVPTX::LD_i8_asi;
725 Opcode = NVPTX::LD_i16_asi;
728 Opcode = NVPTX::LD_i32_asi;
731 Opcode = NVPTX::LD_i64_asi;
734 Opcode = NVPTX::LD_f32_asi;
737 Opcode = NVPTX::LD_f64_asi;
742 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
743 getI32Imm(vecType), getI32Imm(fromType),
744 getI32Imm(fromTypeWidth), Base, Offset, Chain };
745 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
746 } else if (Subtarget->is64Bit()
747 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
748 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
749 if (Subtarget->is64Bit()) {
752 Opcode = NVPTX::LD_i8_ari_64;
755 Opcode = NVPTX::LD_i16_ari_64;
758 Opcode = NVPTX::LD_i32_ari_64;
761 Opcode = NVPTX::LD_i64_ari_64;
764 Opcode = NVPTX::LD_f32_ari_64;
767 Opcode = NVPTX::LD_f64_ari_64;
775 Opcode = NVPTX::LD_i8_ari;
778 Opcode = NVPTX::LD_i16_ari;
781 Opcode = NVPTX::LD_i32_ari;
784 Opcode = NVPTX::LD_i64_ari;
787 Opcode = NVPTX::LD_f32_ari;
790 Opcode = NVPTX::LD_f64_ari;
796 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
797 getI32Imm(vecType), getI32Imm(fromType),
798 getI32Imm(fromTypeWidth), Base, Offset, Chain };
799 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
801 if (Subtarget->is64Bit()) {
804 Opcode = NVPTX::LD_i8_areg_64;
807 Opcode = NVPTX::LD_i16_areg_64;
810 Opcode = NVPTX::LD_i32_areg_64;
813 Opcode = NVPTX::LD_i64_areg_64;
816 Opcode = NVPTX::LD_f32_areg_64;
819 Opcode = NVPTX::LD_f64_areg_64;
827 Opcode = NVPTX::LD_i8_areg;
830 Opcode = NVPTX::LD_i16_areg;
833 Opcode = NVPTX::LD_i32_areg;
836 Opcode = NVPTX::LD_i64_areg;
839 Opcode = NVPTX::LD_f32_areg;
842 Opcode = NVPTX::LD_f64_areg;
848 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
849 getI32Imm(vecType), getI32Imm(fromType),
850 getI32Imm(fromTypeWidth), N1, Chain };
851 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
855 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
856 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
857 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
863 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
865 SDValue Chain = N->getOperand(0);
866 SDValue Op1 = N->getOperand(1);
867 SDValue Addr, Offset, Base;
871 MemSDNode *MemSD = cast<MemSDNode>(N);
872 EVT LoadedVT = MemSD->getMemoryVT();
874 if (!LoadedVT.isSimple())
877 // Address Space Setting
878 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
881 // - .volatile is only availalble for .global and .shared
882 bool IsVolatile = MemSD->isVolatile();
883 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
884 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
885 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
889 MVT SimpleVT = LoadedVT.getSimpleVT();
891 // Type Setting: fromType + fromTypeWidth
893 // Sign : ISD::SEXTLOAD
894 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
896 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
897 MVT ScalarVT = SimpleVT.getScalarType();
898 // Read at least 8 bits (predicates are stored as 8-bit values)
899 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
900 unsigned int FromType;
901 // The last operand holds the original LoadSDNode::getExtensionType() value
902 unsigned ExtensionType = cast<ConstantSDNode>(
903 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
904 if (ExtensionType == ISD::SEXTLOAD)
905 FromType = NVPTX::PTXLdStInstCode::Signed;
906 else if (ScalarVT.isFloatingPoint())
907 FromType = NVPTX::PTXLdStInstCode::Float;
909 FromType = NVPTX::PTXLdStInstCode::Unsigned;
913 switch (N->getOpcode()) {
914 case NVPTXISD::LoadV2:
915 VecType = NVPTX::PTXLdStInstCode::V2;
917 case NVPTXISD::LoadV4:
918 VecType = NVPTX::PTXLdStInstCode::V4;
924 EVT EltVT = N->getValueType(0);
926 if (SelectDirectAddr(Op1, Addr)) {
927 switch (N->getOpcode()) {
930 case NVPTXISD::LoadV2:
931 switch (EltVT.getSimpleVT().SimpleTy) {
935 Opcode = NVPTX::LDV_i8_v2_avar;
938 Opcode = NVPTX::LDV_i16_v2_avar;
941 Opcode = NVPTX::LDV_i32_v2_avar;
944 Opcode = NVPTX::LDV_i64_v2_avar;
947 Opcode = NVPTX::LDV_f32_v2_avar;
950 Opcode = NVPTX::LDV_f64_v2_avar;
954 case NVPTXISD::LoadV4:
955 switch (EltVT.getSimpleVT().SimpleTy) {
959 Opcode = NVPTX::LDV_i8_v4_avar;
962 Opcode = NVPTX::LDV_i16_v4_avar;
965 Opcode = NVPTX::LDV_i32_v4_avar;
968 Opcode = NVPTX::LDV_f32_v4_avar;
974 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
975 getI32Imm(VecType), getI32Imm(FromType),
976 getI32Imm(FromTypeWidth), Addr, Chain };
977 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
978 } else if (Subtarget->is64Bit()
979 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
980 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
981 switch (N->getOpcode()) {
984 case NVPTXISD::LoadV2:
985 switch (EltVT.getSimpleVT().SimpleTy) {
989 Opcode = NVPTX::LDV_i8_v2_asi;
992 Opcode = NVPTX::LDV_i16_v2_asi;
995 Opcode = NVPTX::LDV_i32_v2_asi;
998 Opcode = NVPTX::LDV_i64_v2_asi;
1001 Opcode = NVPTX::LDV_f32_v2_asi;
1004 Opcode = NVPTX::LDV_f64_v2_asi;
1008 case NVPTXISD::LoadV4:
1009 switch (EltVT.getSimpleVT().SimpleTy) {
1013 Opcode = NVPTX::LDV_i8_v4_asi;
1016 Opcode = NVPTX::LDV_i16_v4_asi;
1019 Opcode = NVPTX::LDV_i32_v4_asi;
1022 Opcode = NVPTX::LDV_f32_v4_asi;
1028 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1029 getI32Imm(VecType), getI32Imm(FromType),
1030 getI32Imm(FromTypeWidth), Base, Offset, Chain };
1031 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1032 } else if (Subtarget->is64Bit()
1033 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1034 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1035 if (Subtarget->is64Bit()) {
1036 switch (N->getOpcode()) {
1039 case NVPTXISD::LoadV2:
1040 switch (EltVT.getSimpleVT().SimpleTy) {
1044 Opcode = NVPTX::LDV_i8_v2_ari_64;
1047 Opcode = NVPTX::LDV_i16_v2_ari_64;
1050 Opcode = NVPTX::LDV_i32_v2_ari_64;
1053 Opcode = NVPTX::LDV_i64_v2_ari_64;
1056 Opcode = NVPTX::LDV_f32_v2_ari_64;
1059 Opcode = NVPTX::LDV_f64_v2_ari_64;
1063 case NVPTXISD::LoadV4:
1064 switch (EltVT.getSimpleVT().SimpleTy) {
1068 Opcode = NVPTX::LDV_i8_v4_ari_64;
1071 Opcode = NVPTX::LDV_i16_v4_ari_64;
1074 Opcode = NVPTX::LDV_i32_v4_ari_64;
1077 Opcode = NVPTX::LDV_f32_v4_ari_64;
1083 switch (N->getOpcode()) {
1086 case NVPTXISD::LoadV2:
1087 switch (EltVT.getSimpleVT().SimpleTy) {
1091 Opcode = NVPTX::LDV_i8_v2_ari;
1094 Opcode = NVPTX::LDV_i16_v2_ari;
1097 Opcode = NVPTX::LDV_i32_v2_ari;
1100 Opcode = NVPTX::LDV_i64_v2_ari;
1103 Opcode = NVPTX::LDV_f32_v2_ari;
1106 Opcode = NVPTX::LDV_f64_v2_ari;
1110 case NVPTXISD::LoadV4:
1111 switch (EltVT.getSimpleVT().SimpleTy) {
1115 Opcode = NVPTX::LDV_i8_v4_ari;
1118 Opcode = NVPTX::LDV_i16_v4_ari;
1121 Opcode = NVPTX::LDV_i32_v4_ari;
1124 Opcode = NVPTX::LDV_f32_v4_ari;
1131 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1132 getI32Imm(VecType), getI32Imm(FromType),
1133 getI32Imm(FromTypeWidth), Base, Offset, Chain };
1135 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1137 if (Subtarget->is64Bit()) {
1138 switch (N->getOpcode()) {
1141 case NVPTXISD::LoadV2:
1142 switch (EltVT.getSimpleVT().SimpleTy) {
1146 Opcode = NVPTX::LDV_i8_v2_areg_64;
1149 Opcode = NVPTX::LDV_i16_v2_areg_64;
1152 Opcode = NVPTX::LDV_i32_v2_areg_64;
1155 Opcode = NVPTX::LDV_i64_v2_areg_64;
1158 Opcode = NVPTX::LDV_f32_v2_areg_64;
1161 Opcode = NVPTX::LDV_f64_v2_areg_64;
1165 case NVPTXISD::LoadV4:
1166 switch (EltVT.getSimpleVT().SimpleTy) {
1170 Opcode = NVPTX::LDV_i8_v4_areg_64;
1173 Opcode = NVPTX::LDV_i16_v4_areg_64;
1176 Opcode = NVPTX::LDV_i32_v4_areg_64;
1179 Opcode = NVPTX::LDV_f32_v4_areg_64;
1185 switch (N->getOpcode()) {
1188 case NVPTXISD::LoadV2:
1189 switch (EltVT.getSimpleVT().SimpleTy) {
1193 Opcode = NVPTX::LDV_i8_v2_areg;
1196 Opcode = NVPTX::LDV_i16_v2_areg;
1199 Opcode = NVPTX::LDV_i32_v2_areg;
1202 Opcode = NVPTX::LDV_i64_v2_areg;
1205 Opcode = NVPTX::LDV_f32_v2_areg;
1208 Opcode = NVPTX::LDV_f64_v2_areg;
1212 case NVPTXISD::LoadV4:
1213 switch (EltVT.getSimpleVT().SimpleTy) {
1217 Opcode = NVPTX::LDV_i8_v4_areg;
1220 Opcode = NVPTX::LDV_i16_v4_areg;
1223 Opcode = NVPTX::LDV_i32_v4_areg;
1226 Opcode = NVPTX::LDV_f32_v4_areg;
1233 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1234 getI32Imm(VecType), getI32Imm(FromType),
1235 getI32Imm(FromTypeWidth), Op1, Chain };
1236 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1239 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1240 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1241 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1246 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
1248 SDValue Chain = N->getOperand(0);
1253 // If this is an LDG intrinsic, the address is the third operand. Its its an
1254 // LDG/LDU SD node (from custom vector handling), then its the second operand
1255 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1256 Op1 = N->getOperand(2);
1257 Mem = cast<MemIntrinsicSDNode>(N);
1258 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1262 case Intrinsic::nvvm_ldg_global_f:
1263 case Intrinsic::nvvm_ldg_global_i:
1264 case Intrinsic::nvvm_ldg_global_p:
1267 case Intrinsic::nvvm_ldu_global_f:
1268 case Intrinsic::nvvm_ldu_global_i:
1269 case Intrinsic::nvvm_ldu_global_p:
1274 Op1 = N->getOperand(1);
1275 Mem = cast<MemSDNode>(N);
1281 SDValue Base, Offset, Addr;
1283 EVT EltVT = Mem->getMemoryVT();
1284 if (EltVT.isVector()) {
1285 EltVT = EltVT.getVectorElementType();
1288 if (SelectDirectAddr(Op1, Addr)) {
1289 switch (N->getOpcode()) {
1292 case ISD::INTRINSIC_W_CHAIN:
1294 switch (EltVT.getSimpleVT().SimpleTy) {
1298 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1301 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1304 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1307 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1310 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1313 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1317 switch (EltVT.getSimpleVT().SimpleTy) {
1321 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1324 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1327 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1330 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1333 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1336 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1341 case NVPTXISD::LDGV2:
1342 switch (EltVT.getSimpleVT().SimpleTy) {
1346 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1349 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1352 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1355 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1358 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1361 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1365 case NVPTXISD::LDUV2:
1366 switch (EltVT.getSimpleVT().SimpleTy) {
1370 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1373 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1376 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1379 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1382 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1385 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1389 case NVPTXISD::LDGV4:
1390 switch (EltVT.getSimpleVT().SimpleTy) {
1394 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1397 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1400 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1403 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1407 case NVPTXISD::LDUV4:
1408 switch (EltVT.getSimpleVT().SimpleTy) {
1412 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1415 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1418 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1421 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1427 SDValue Ops[] = { Addr, Chain };
1428 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1429 } else if (Subtarget->is64Bit()
1430 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1431 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1432 if (Subtarget->is64Bit()) {
1433 switch (N->getOpcode()) {
1436 case ISD::INTRINSIC_W_CHAIN:
1438 switch (EltVT.getSimpleVT().SimpleTy) {
1442 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1445 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1448 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1451 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1454 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1457 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1461 switch (EltVT.getSimpleVT().SimpleTy) {
1465 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1468 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1471 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1474 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1477 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1480 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1485 case NVPTXISD::LDGV2:
1486 switch (EltVT.getSimpleVT().SimpleTy) {
1490 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1493 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1496 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1499 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1502 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1505 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1509 case NVPTXISD::LDUV2:
1510 switch (EltVT.getSimpleVT().SimpleTy) {
1514 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1517 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1520 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1523 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1526 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1529 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1533 case NVPTXISD::LDGV4:
1534 switch (EltVT.getSimpleVT().SimpleTy) {
1538 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1541 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1544 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1547 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1551 case NVPTXISD::LDUV4:
1552 switch (EltVT.getSimpleVT().SimpleTy) {
1556 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1559 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1562 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1565 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1571 switch (N->getOpcode()) {
1574 case ISD::INTRINSIC_W_CHAIN:
1576 switch (EltVT.getSimpleVT().SimpleTy) {
1580 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1583 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1586 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1589 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1592 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1595 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1599 switch (EltVT.getSimpleVT().SimpleTy) {
1603 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1606 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1609 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1612 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1615 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1618 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1623 case NVPTXISD::LDGV2:
1624 switch (EltVT.getSimpleVT().SimpleTy) {
1628 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1631 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1634 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1637 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1640 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1643 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1647 case NVPTXISD::LDUV2:
1648 switch (EltVT.getSimpleVT().SimpleTy) {
1652 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1655 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1658 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1661 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1664 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1667 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1671 case NVPTXISD::LDGV4:
1672 switch (EltVT.getSimpleVT().SimpleTy) {
1676 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1679 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1682 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1685 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1689 case NVPTXISD::LDUV4:
1690 switch (EltVT.getSimpleVT().SimpleTy) {
1694 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1697 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1700 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1703 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1710 SDValue Ops[] = { Base, Offset, Chain };
1712 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1714 if (Subtarget->is64Bit()) {
1715 switch (N->getOpcode()) {
1718 case ISD::INTRINSIC_W_CHAIN:
1720 switch (EltVT.getSimpleVT().SimpleTy) {
1724 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1727 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1730 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1733 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1736 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1739 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1743 switch (EltVT.getSimpleVT().SimpleTy) {
1747 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1750 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1753 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1756 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1759 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1762 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1767 case NVPTXISD::LDGV2:
1768 switch (EltVT.getSimpleVT().SimpleTy) {
1772 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1775 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1778 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1781 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1784 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1787 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1791 case NVPTXISD::LDUV2:
1792 switch (EltVT.getSimpleVT().SimpleTy) {
1796 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1799 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1802 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1805 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1808 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1811 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1815 case NVPTXISD::LDGV4:
1816 switch (EltVT.getSimpleVT().SimpleTy) {
1820 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1823 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1826 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1829 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1833 case NVPTXISD::LDUV4:
1834 switch (EltVT.getSimpleVT().SimpleTy) {
1838 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1841 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1844 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1847 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1853 switch (N->getOpcode()) {
1856 case ISD::INTRINSIC_W_CHAIN:
1858 switch (EltVT.getSimpleVT().SimpleTy) {
1862 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1865 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1868 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1871 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1874 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1877 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1881 switch (EltVT.getSimpleVT().SimpleTy) {
1885 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1888 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1891 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1894 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1897 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1900 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1905 case NVPTXISD::LDGV2:
1906 switch (EltVT.getSimpleVT().SimpleTy) {
1910 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1913 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1916 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1919 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1922 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1925 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1929 case NVPTXISD::LDUV2:
1930 switch (EltVT.getSimpleVT().SimpleTy) {
1934 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1937 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1940 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1943 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1946 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1949 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1953 case NVPTXISD::LDGV4:
1954 switch (EltVT.getSimpleVT().SimpleTy) {
1958 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1961 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1964 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1967 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1971 case NVPTXISD::LDUV4:
1972 switch (EltVT.getSimpleVT().SimpleTy) {
1976 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1979 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1982 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1985 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1992 SDValue Ops[] = { Op1, Chain };
1993 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1996 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1997 MemRefs0[0] = Mem->getMemOperand();
1998 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
2003 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
2005 StoreSDNode *ST = cast<StoreSDNode>(N);
2006 EVT StoreVT = ST->getMemoryVT();
2007 SDNode *NVPTXST = nullptr;
2009 // do not support pre/post inc/dec
2010 if (ST->isIndexed())
2013 if (!StoreVT.isSimple())
2016 // Address Space Setting
2017 unsigned int codeAddrSpace = getCodeAddrSpace(ST);
2020 // - .volatile is only availalble for .global and .shared
2021 bool isVolatile = ST->isVolatile();
2022 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2023 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2024 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2028 MVT SimpleVT = StoreVT.getSimpleVT();
2029 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
2030 if (SimpleVT.isVector()) {
2031 unsigned num = SimpleVT.getVectorNumElements();
2033 vecType = NVPTX::PTXLdStInstCode::V2;
2035 vecType = NVPTX::PTXLdStInstCode::V4;
2040 // Type Setting: toType + toTypeWidth
2041 // - for integer type, always use 'u'
2043 MVT ScalarVT = SimpleVT.getScalarType();
2044 unsigned toTypeWidth = ScalarVT.getSizeInBits();
2045 unsigned int toType;
2046 if (ScalarVT.isFloatingPoint())
2047 toType = NVPTX::PTXLdStInstCode::Float;
2049 toType = NVPTX::PTXLdStInstCode::Unsigned;
2051 // Create the machine instruction DAG
2052 SDValue Chain = N->getOperand(0);
2053 SDValue N1 = N->getOperand(1);
2054 SDValue N2 = N->getOperand(2);
2056 SDValue Offset, Base;
2058 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
2060 if (SelectDirectAddr(N2, Addr)) {
2063 Opcode = NVPTX::ST_i8_avar;
2066 Opcode = NVPTX::ST_i16_avar;
2069 Opcode = NVPTX::ST_i32_avar;
2072 Opcode = NVPTX::ST_i64_avar;
2075 Opcode = NVPTX::ST_f32_avar;
2078 Opcode = NVPTX::ST_f64_avar;
2083 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2084 getI32Imm(vecType), getI32Imm(toType),
2085 getI32Imm(toTypeWidth), Addr, Chain };
2086 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2087 } else if (Subtarget->is64Bit()
2088 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2089 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2092 Opcode = NVPTX::ST_i8_asi;
2095 Opcode = NVPTX::ST_i16_asi;
2098 Opcode = NVPTX::ST_i32_asi;
2101 Opcode = NVPTX::ST_i64_asi;
2104 Opcode = NVPTX::ST_f32_asi;
2107 Opcode = NVPTX::ST_f64_asi;
2112 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2113 getI32Imm(vecType), getI32Imm(toType),
2114 getI32Imm(toTypeWidth), Base, Offset, Chain };
2115 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2116 } else if (Subtarget->is64Bit()
2117 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2118 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2119 if (Subtarget->is64Bit()) {
2122 Opcode = NVPTX::ST_i8_ari_64;
2125 Opcode = NVPTX::ST_i16_ari_64;
2128 Opcode = NVPTX::ST_i32_ari_64;
2131 Opcode = NVPTX::ST_i64_ari_64;
2134 Opcode = NVPTX::ST_f32_ari_64;
2137 Opcode = NVPTX::ST_f64_ari_64;
2145 Opcode = NVPTX::ST_i8_ari;
2148 Opcode = NVPTX::ST_i16_ari;
2151 Opcode = NVPTX::ST_i32_ari;
2154 Opcode = NVPTX::ST_i64_ari;
2157 Opcode = NVPTX::ST_f32_ari;
2160 Opcode = NVPTX::ST_f64_ari;
2166 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2167 getI32Imm(vecType), getI32Imm(toType),
2168 getI32Imm(toTypeWidth), Base, Offset, Chain };
2169 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2171 if (Subtarget->is64Bit()) {
2174 Opcode = NVPTX::ST_i8_areg_64;
2177 Opcode = NVPTX::ST_i16_areg_64;
2180 Opcode = NVPTX::ST_i32_areg_64;
2183 Opcode = NVPTX::ST_i64_areg_64;
2186 Opcode = NVPTX::ST_f32_areg_64;
2189 Opcode = NVPTX::ST_f64_areg_64;
2197 Opcode = NVPTX::ST_i8_areg;
2200 Opcode = NVPTX::ST_i16_areg;
2203 Opcode = NVPTX::ST_i32_areg;
2206 Opcode = NVPTX::ST_i64_areg;
2209 Opcode = NVPTX::ST_f32_areg;
2212 Opcode = NVPTX::ST_f64_areg;
2218 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2219 getI32Imm(vecType), getI32Imm(toType),
2220 getI32Imm(toTypeWidth), N2, Chain };
2221 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2225 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2226 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2227 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2233 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
2234 SDValue Chain = N->getOperand(0);
2235 SDValue Op1 = N->getOperand(1);
2236 SDValue Addr, Offset, Base;
2240 EVT EltVT = Op1.getValueType();
2241 MemSDNode *MemSD = cast<MemSDNode>(N);
2242 EVT StoreVT = MemSD->getMemoryVT();
2244 // Address Space Setting
2245 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
2247 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2248 report_fatal_error("Cannot store to pointer that points to constant "
2253 // - .volatile is only availalble for .global and .shared
2254 bool IsVolatile = MemSD->isVolatile();
2255 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2256 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2257 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2260 // Type Setting: toType + toTypeWidth
2261 // - for integer type, always use 'u'
2262 assert(StoreVT.isSimple() && "Store value is not simple");
2263 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2264 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2266 if (ScalarVT.isFloatingPoint())
2267 ToType = NVPTX::PTXLdStInstCode::Float;
2269 ToType = NVPTX::PTXLdStInstCode::Unsigned;
2271 SmallVector<SDValue, 12> StOps;
2275 switch (N->getOpcode()) {
2276 case NVPTXISD::StoreV2:
2277 VecType = NVPTX::PTXLdStInstCode::V2;
2278 StOps.push_back(N->getOperand(1));
2279 StOps.push_back(N->getOperand(2));
2280 N2 = N->getOperand(3);
2282 case NVPTXISD::StoreV4:
2283 VecType = NVPTX::PTXLdStInstCode::V4;
2284 StOps.push_back(N->getOperand(1));
2285 StOps.push_back(N->getOperand(2));
2286 StOps.push_back(N->getOperand(3));
2287 StOps.push_back(N->getOperand(4));
2288 N2 = N->getOperand(5);
2294 StOps.push_back(getI32Imm(IsVolatile));
2295 StOps.push_back(getI32Imm(CodeAddrSpace));
2296 StOps.push_back(getI32Imm(VecType));
2297 StOps.push_back(getI32Imm(ToType));
2298 StOps.push_back(getI32Imm(ToTypeWidth));
2300 if (SelectDirectAddr(N2, Addr)) {
2301 switch (N->getOpcode()) {
2304 case NVPTXISD::StoreV2:
2305 switch (EltVT.getSimpleVT().SimpleTy) {
2309 Opcode = NVPTX::STV_i8_v2_avar;
2312 Opcode = NVPTX::STV_i16_v2_avar;
2315 Opcode = NVPTX::STV_i32_v2_avar;
2318 Opcode = NVPTX::STV_i64_v2_avar;
2321 Opcode = NVPTX::STV_f32_v2_avar;
2324 Opcode = NVPTX::STV_f64_v2_avar;
2328 case NVPTXISD::StoreV4:
2329 switch (EltVT.getSimpleVT().SimpleTy) {
2333 Opcode = NVPTX::STV_i8_v4_avar;
2336 Opcode = NVPTX::STV_i16_v4_avar;
2339 Opcode = NVPTX::STV_i32_v4_avar;
2342 Opcode = NVPTX::STV_f32_v4_avar;
2347 StOps.push_back(Addr);
2348 } else if (Subtarget->is64Bit()
2349 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2350 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2351 switch (N->getOpcode()) {
2354 case NVPTXISD::StoreV2:
2355 switch (EltVT.getSimpleVT().SimpleTy) {
2359 Opcode = NVPTX::STV_i8_v2_asi;
2362 Opcode = NVPTX::STV_i16_v2_asi;
2365 Opcode = NVPTX::STV_i32_v2_asi;
2368 Opcode = NVPTX::STV_i64_v2_asi;
2371 Opcode = NVPTX::STV_f32_v2_asi;
2374 Opcode = NVPTX::STV_f64_v2_asi;
2378 case NVPTXISD::StoreV4:
2379 switch (EltVT.getSimpleVT().SimpleTy) {
2383 Opcode = NVPTX::STV_i8_v4_asi;
2386 Opcode = NVPTX::STV_i16_v4_asi;
2389 Opcode = NVPTX::STV_i32_v4_asi;
2392 Opcode = NVPTX::STV_f32_v4_asi;
2397 StOps.push_back(Base);
2398 StOps.push_back(Offset);
2399 } else if (Subtarget->is64Bit()
2400 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2401 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2402 if (Subtarget->is64Bit()) {
2403 switch (N->getOpcode()) {
2406 case NVPTXISD::StoreV2:
2407 switch (EltVT.getSimpleVT().SimpleTy) {
2411 Opcode = NVPTX::STV_i8_v2_ari_64;
2414 Opcode = NVPTX::STV_i16_v2_ari_64;
2417 Opcode = NVPTX::STV_i32_v2_ari_64;
2420 Opcode = NVPTX::STV_i64_v2_ari_64;
2423 Opcode = NVPTX::STV_f32_v2_ari_64;
2426 Opcode = NVPTX::STV_f64_v2_ari_64;
2430 case NVPTXISD::StoreV4:
2431 switch (EltVT.getSimpleVT().SimpleTy) {
2435 Opcode = NVPTX::STV_i8_v4_ari_64;
2438 Opcode = NVPTX::STV_i16_v4_ari_64;
2441 Opcode = NVPTX::STV_i32_v4_ari_64;
2444 Opcode = NVPTX::STV_f32_v4_ari_64;
2450 switch (N->getOpcode()) {
2453 case NVPTXISD::StoreV2:
2454 switch (EltVT.getSimpleVT().SimpleTy) {
2458 Opcode = NVPTX::STV_i8_v2_ari;
2461 Opcode = NVPTX::STV_i16_v2_ari;
2464 Opcode = NVPTX::STV_i32_v2_ari;
2467 Opcode = NVPTX::STV_i64_v2_ari;
2470 Opcode = NVPTX::STV_f32_v2_ari;
2473 Opcode = NVPTX::STV_f64_v2_ari;
2477 case NVPTXISD::StoreV4:
2478 switch (EltVT.getSimpleVT().SimpleTy) {
2482 Opcode = NVPTX::STV_i8_v4_ari;
2485 Opcode = NVPTX::STV_i16_v4_ari;
2488 Opcode = NVPTX::STV_i32_v4_ari;
2491 Opcode = NVPTX::STV_f32_v4_ari;
2497 StOps.push_back(Base);
2498 StOps.push_back(Offset);
2500 if (Subtarget->is64Bit()) {
2501 switch (N->getOpcode()) {
2504 case NVPTXISD::StoreV2:
2505 switch (EltVT.getSimpleVT().SimpleTy) {
2509 Opcode = NVPTX::STV_i8_v2_areg_64;
2512 Opcode = NVPTX::STV_i16_v2_areg_64;
2515 Opcode = NVPTX::STV_i32_v2_areg_64;
2518 Opcode = NVPTX::STV_i64_v2_areg_64;
2521 Opcode = NVPTX::STV_f32_v2_areg_64;
2524 Opcode = NVPTX::STV_f64_v2_areg_64;
2528 case NVPTXISD::StoreV4:
2529 switch (EltVT.getSimpleVT().SimpleTy) {
2533 Opcode = NVPTX::STV_i8_v4_areg_64;
2536 Opcode = NVPTX::STV_i16_v4_areg_64;
2539 Opcode = NVPTX::STV_i32_v4_areg_64;
2542 Opcode = NVPTX::STV_f32_v4_areg_64;
2548 switch (N->getOpcode()) {
2551 case NVPTXISD::StoreV2:
2552 switch (EltVT.getSimpleVT().SimpleTy) {
2556 Opcode = NVPTX::STV_i8_v2_areg;
2559 Opcode = NVPTX::STV_i16_v2_areg;
2562 Opcode = NVPTX::STV_i32_v2_areg;
2565 Opcode = NVPTX::STV_i64_v2_areg;
2568 Opcode = NVPTX::STV_f32_v2_areg;
2571 Opcode = NVPTX::STV_f64_v2_areg;
2575 case NVPTXISD::StoreV4:
2576 switch (EltVT.getSimpleVT().SimpleTy) {
2580 Opcode = NVPTX::STV_i8_v4_areg;
2583 Opcode = NVPTX::STV_i16_v4_areg;
2586 Opcode = NVPTX::STV_i32_v4_areg;
2589 Opcode = NVPTX::STV_f32_v4_areg;
2595 StOps.push_back(N2);
2598 StOps.push_back(Chain);
2600 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2602 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2603 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2604 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2609 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2610 SDValue Chain = Node->getOperand(0);
2611 SDValue Offset = Node->getOperand(2);
2612 SDValue Flag = Node->getOperand(3);
2614 MemSDNode *Mem = cast<MemSDNode>(Node);
2617 switch (Node->getOpcode()) {
2620 case NVPTXISD::LoadParam:
2623 case NVPTXISD::LoadParamV2:
2626 case NVPTXISD::LoadParamV4:
2631 EVT EltVT = Node->getValueType(0);
2632 EVT MemVT = Mem->getMemoryVT();
2640 switch (MemVT.getSimpleVT().SimpleTy) {
2644 Opc = NVPTX::LoadParamMemI8;
2647 Opc = NVPTX::LoadParamMemI8;
2650 Opc = NVPTX::LoadParamMemI16;
2653 Opc = NVPTX::LoadParamMemI32;
2656 Opc = NVPTX::LoadParamMemI64;
2659 Opc = NVPTX::LoadParamMemF32;
2662 Opc = NVPTX::LoadParamMemF64;
2667 switch (MemVT.getSimpleVT().SimpleTy) {
2671 Opc = NVPTX::LoadParamMemV2I8;
2674 Opc = NVPTX::LoadParamMemV2I8;
2677 Opc = NVPTX::LoadParamMemV2I16;
2680 Opc = NVPTX::LoadParamMemV2I32;
2683 Opc = NVPTX::LoadParamMemV2I64;
2686 Opc = NVPTX::LoadParamMemV2F32;
2689 Opc = NVPTX::LoadParamMemV2F64;
2694 switch (MemVT.getSimpleVT().SimpleTy) {
2698 Opc = NVPTX::LoadParamMemV4I8;
2701 Opc = NVPTX::LoadParamMemV4I8;
2704 Opc = NVPTX::LoadParamMemV4I16;
2707 Opc = NVPTX::LoadParamMemV4I32;
2710 Opc = NVPTX::LoadParamMemV4F32;
2718 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2719 } else if (VecSize == 2) {
2720 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2722 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2723 VTs = CurDAG->getVTList(EVTs);
2726 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2728 SmallVector<SDValue, 2> Ops;
2729 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2730 Ops.push_back(Chain);
2731 Ops.push_back(Flag);
2734 CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2738 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2740 SDValue Chain = N->getOperand(0);
2741 SDValue Offset = N->getOperand(1);
2742 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2743 MemSDNode *Mem = cast<MemSDNode>(N);
2745 // How many elements do we have?
2746 unsigned NumElts = 1;
2747 switch (N->getOpcode()) {
2750 case NVPTXISD::StoreRetval:
2753 case NVPTXISD::StoreRetvalV2:
2756 case NVPTXISD::StoreRetvalV4:
2761 // Build vector of operands
2762 SmallVector<SDValue, 6> Ops;
2763 for (unsigned i = 0; i < NumElts; ++i)
2764 Ops.push_back(N->getOperand(i + 2));
2765 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2766 Ops.push_back(Chain);
2768 // Determine target opcode
2769 // If we have an i1, use an 8-bit store. The lowering code in
2770 // NVPTXISelLowering will have already emitted an upcast.
2771 unsigned Opcode = 0;
2776 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2780 Opcode = NVPTX::StoreRetvalI8;
2783 Opcode = NVPTX::StoreRetvalI8;
2786 Opcode = NVPTX::StoreRetvalI16;
2789 Opcode = NVPTX::StoreRetvalI32;
2792 Opcode = NVPTX::StoreRetvalI64;
2795 Opcode = NVPTX::StoreRetvalF32;
2798 Opcode = NVPTX::StoreRetvalF64;
2803 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2807 Opcode = NVPTX::StoreRetvalV2I8;
2810 Opcode = NVPTX::StoreRetvalV2I8;
2813 Opcode = NVPTX::StoreRetvalV2I16;
2816 Opcode = NVPTX::StoreRetvalV2I32;
2819 Opcode = NVPTX::StoreRetvalV2I64;
2822 Opcode = NVPTX::StoreRetvalV2F32;
2825 Opcode = NVPTX::StoreRetvalV2F64;
2830 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2834 Opcode = NVPTX::StoreRetvalV4I8;
2837 Opcode = NVPTX::StoreRetvalV4I8;
2840 Opcode = NVPTX::StoreRetvalV4I16;
2843 Opcode = NVPTX::StoreRetvalV4I32;
2846 Opcode = NVPTX::StoreRetvalV4F32;
2853 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2854 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2855 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2856 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2861 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2863 SDValue Chain = N->getOperand(0);
2864 SDValue Param = N->getOperand(1);
2865 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2866 SDValue Offset = N->getOperand(2);
2867 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2868 MemSDNode *Mem = cast<MemSDNode>(N);
2869 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2871 // How many elements do we have?
2872 unsigned NumElts = 1;
2873 switch (N->getOpcode()) {
2876 case NVPTXISD::StoreParamU32:
2877 case NVPTXISD::StoreParamS32:
2878 case NVPTXISD::StoreParam:
2881 case NVPTXISD::StoreParamV2:
2884 case NVPTXISD::StoreParamV4:
2889 // Build vector of operands
2890 SmallVector<SDValue, 8> Ops;
2891 for (unsigned i = 0; i < NumElts; ++i)
2892 Ops.push_back(N->getOperand(i + 3));
2893 Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
2894 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2895 Ops.push_back(Chain);
2896 Ops.push_back(Flag);
2898 // Determine target opcode
2899 // If we have an i1, use an 8-bit store. The lowering code in
2900 // NVPTXISelLowering will have already emitted an upcast.
2901 unsigned Opcode = 0;
2902 switch (N->getOpcode()) {
2908 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2912 Opcode = NVPTX::StoreParamI8;
2915 Opcode = NVPTX::StoreParamI8;
2918 Opcode = NVPTX::StoreParamI16;
2921 Opcode = NVPTX::StoreParamI32;
2924 Opcode = NVPTX::StoreParamI64;
2927 Opcode = NVPTX::StoreParamF32;
2930 Opcode = NVPTX::StoreParamF64;
2935 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2939 Opcode = NVPTX::StoreParamV2I8;
2942 Opcode = NVPTX::StoreParamV2I8;
2945 Opcode = NVPTX::StoreParamV2I16;
2948 Opcode = NVPTX::StoreParamV2I32;
2951 Opcode = NVPTX::StoreParamV2I64;
2954 Opcode = NVPTX::StoreParamV2F32;
2957 Opcode = NVPTX::StoreParamV2F64;
2962 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2966 Opcode = NVPTX::StoreParamV4I8;
2969 Opcode = NVPTX::StoreParamV4I8;
2972 Opcode = NVPTX::StoreParamV4I16;
2975 Opcode = NVPTX::StoreParamV4I32;
2978 Opcode = NVPTX::StoreParamV4F32;
2984 // Special case: if we have a sign-extend/zero-extend node, insert the
2985 // conversion instruction first, and use that as the value operand to
2986 // the selected StoreParam node.
2987 case NVPTXISD::StoreParamU32: {
2988 Opcode = NVPTX::StoreParamI32;
2989 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2991 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2992 MVT::i32, Ops[0], CvtNone);
2993 Ops[0] = SDValue(Cvt, 0);
2996 case NVPTXISD::StoreParamS32: {
2997 Opcode = NVPTX::StoreParamI32;
2998 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
3000 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
3001 MVT::i32, Ops[0], CvtNone);
3002 Ops[0] = SDValue(Cvt, 0);
3007 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
3009 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
3010 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
3011 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
3012 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
3017 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
3018 SDValue Chain = N->getOperand(0);
3019 SDNode *Ret = nullptr;
3021 SmallVector<SDValue, 8> Ops;
3023 switch (N->getOpcode()) {
3024 default: return nullptr;
3025 case NVPTXISD::Tex1DFloatS32:
3026 Opc = NVPTX::TEX_1D_F32_S32;
3028 case NVPTXISD::Tex1DFloatFloat:
3029 Opc = NVPTX::TEX_1D_F32_F32;
3031 case NVPTXISD::Tex1DFloatFloatLevel:
3032 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3034 case NVPTXISD::Tex1DFloatFloatGrad:
3035 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3037 case NVPTXISD::Tex1DS32S32:
3038 Opc = NVPTX::TEX_1D_S32_S32;
3040 case NVPTXISD::Tex1DS32Float:
3041 Opc = NVPTX::TEX_1D_S32_F32;
3043 case NVPTXISD::Tex1DS32FloatLevel:
3044 Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
3046 case NVPTXISD::Tex1DS32FloatGrad:
3047 Opc = NVPTX::TEX_1D_S32_F32_GRAD;
3049 case NVPTXISD::Tex1DU32S32:
3050 Opc = NVPTX::TEX_1D_U32_S32;
3052 case NVPTXISD::Tex1DU32Float:
3053 Opc = NVPTX::TEX_1D_U32_F32;
3055 case NVPTXISD::Tex1DU32FloatLevel:
3056 Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3058 case NVPTXISD::Tex1DU32FloatGrad:
3059 Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3061 case NVPTXISD::Tex1DArrayFloatS32:
3062 Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
3064 case NVPTXISD::Tex1DArrayFloatFloat:
3065 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3067 case NVPTXISD::Tex1DArrayFloatFloatLevel:
3068 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3070 case NVPTXISD::Tex1DArrayFloatFloatGrad:
3071 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3073 case NVPTXISD::Tex1DArrayS32S32:
3074 Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
3076 case NVPTXISD::Tex1DArrayS32Float:
3077 Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
3079 case NVPTXISD::Tex1DArrayS32FloatLevel:
3080 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
3082 case NVPTXISD::Tex1DArrayS32FloatGrad:
3083 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
3085 case NVPTXISD::Tex1DArrayU32S32:
3086 Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3088 case NVPTXISD::Tex1DArrayU32Float:
3089 Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3091 case NVPTXISD::Tex1DArrayU32FloatLevel:
3092 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3094 case NVPTXISD::Tex1DArrayU32FloatGrad:
3095 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3097 case NVPTXISD::Tex2DFloatS32:
3098 Opc = NVPTX::TEX_2D_F32_S32;
3100 case NVPTXISD::Tex2DFloatFloat:
3101 Opc = NVPTX::TEX_2D_F32_F32;
3103 case NVPTXISD::Tex2DFloatFloatLevel:
3104 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3106 case NVPTXISD::Tex2DFloatFloatGrad:
3107 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3109 case NVPTXISD::Tex2DS32S32:
3110 Opc = NVPTX::TEX_2D_S32_S32;
3112 case NVPTXISD::Tex2DS32Float:
3113 Opc = NVPTX::TEX_2D_S32_F32;
3115 case NVPTXISD::Tex2DS32FloatLevel:
3116 Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
3118 case NVPTXISD::Tex2DS32FloatGrad:
3119 Opc = NVPTX::TEX_2D_S32_F32_GRAD;
3121 case NVPTXISD::Tex2DU32S32:
3122 Opc = NVPTX::TEX_2D_U32_S32;
3124 case NVPTXISD::Tex2DU32Float:
3125 Opc = NVPTX::TEX_2D_U32_F32;
3127 case NVPTXISD::Tex2DU32FloatLevel:
3128 Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3130 case NVPTXISD::Tex2DU32FloatGrad:
3131 Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3133 case NVPTXISD::Tex2DArrayFloatS32:
3134 Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
3136 case NVPTXISD::Tex2DArrayFloatFloat:
3137 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3139 case NVPTXISD::Tex2DArrayFloatFloatLevel:
3140 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3142 case NVPTXISD::Tex2DArrayFloatFloatGrad:
3143 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3145 case NVPTXISD::Tex2DArrayS32S32:
3146 Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
3148 case NVPTXISD::Tex2DArrayS32Float:
3149 Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
3151 case NVPTXISD::Tex2DArrayS32FloatLevel:
3152 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
3154 case NVPTXISD::Tex2DArrayS32FloatGrad:
3155 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
3157 case NVPTXISD::Tex2DArrayU32S32:
3158 Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3160 case NVPTXISD::Tex2DArrayU32Float:
3161 Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3163 case NVPTXISD::Tex2DArrayU32FloatLevel:
3164 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3166 case NVPTXISD::Tex2DArrayU32FloatGrad:
3167 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3169 case NVPTXISD::Tex3DFloatS32:
3170 Opc = NVPTX::TEX_3D_F32_S32;
3172 case NVPTXISD::Tex3DFloatFloat:
3173 Opc = NVPTX::TEX_3D_F32_F32;
3175 case NVPTXISD::Tex3DFloatFloatLevel:
3176 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3178 case NVPTXISD::Tex3DFloatFloatGrad:
3179 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3181 case NVPTXISD::Tex3DS32S32:
3182 Opc = NVPTX::TEX_3D_S32_S32;
3184 case NVPTXISD::Tex3DS32Float:
3185 Opc = NVPTX::TEX_3D_S32_F32;
3187 case NVPTXISD::Tex3DS32FloatLevel:
3188 Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
3190 case NVPTXISD::Tex3DS32FloatGrad:
3191 Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3193 case NVPTXISD::Tex3DU32S32:
3194 Opc = NVPTX::TEX_3D_U32_S32;
3196 case NVPTXISD::Tex3DU32Float:
3197 Opc = NVPTX::TEX_3D_U32_F32;
3199 case NVPTXISD::Tex3DU32FloatLevel:
3200 Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3202 case NVPTXISD::Tex3DU32FloatGrad:
3203 Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3205 case NVPTXISD::TexCubeFloatFloat:
3206 Opc = NVPTX::TEX_CUBE_F32_F32;
3208 case NVPTXISD::TexCubeFloatFloatLevel:
3209 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3211 case NVPTXISD::TexCubeS32Float:
3212 Opc = NVPTX::TEX_CUBE_S32_F32;
3214 case NVPTXISD::TexCubeS32FloatLevel:
3215 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3217 case NVPTXISD::TexCubeU32Float:
3218 Opc = NVPTX::TEX_CUBE_U32_F32;
3220 case NVPTXISD::TexCubeU32FloatLevel:
3221 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3223 case NVPTXISD::TexCubeArrayFloatFloat:
3224 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3226 case NVPTXISD::TexCubeArrayFloatFloatLevel:
3227 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3229 case NVPTXISD::TexCubeArrayS32Float:
3230 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3232 case NVPTXISD::TexCubeArrayS32FloatLevel:
3233 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3235 case NVPTXISD::TexCubeArrayU32Float:
3236 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3238 case NVPTXISD::TexCubeArrayU32FloatLevel:
3239 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3241 case NVPTXISD::Tld4R2DFloatFloat:
3242 Opc = NVPTX::TLD4_R_2D_F32_F32;
3244 case NVPTXISD::Tld4G2DFloatFloat:
3245 Opc = NVPTX::TLD4_G_2D_F32_F32;
3247 case NVPTXISD::Tld4B2DFloatFloat:
3248 Opc = NVPTX::TLD4_B_2D_F32_F32;
3250 case NVPTXISD::Tld4A2DFloatFloat:
3251 Opc = NVPTX::TLD4_A_2D_F32_F32;
3253 case NVPTXISD::Tld4R2DS64Float:
3254 Opc = NVPTX::TLD4_R_2D_S32_F32;
3256 case NVPTXISD::Tld4G2DS64Float:
3257 Opc = NVPTX::TLD4_G_2D_S32_F32;
3259 case NVPTXISD::Tld4B2DS64Float:
3260 Opc = NVPTX::TLD4_B_2D_S32_F32;
3262 case NVPTXISD::Tld4A2DS64Float:
3263 Opc = NVPTX::TLD4_A_2D_S32_F32;
3265 case NVPTXISD::Tld4R2DU64Float:
3266 Opc = NVPTX::TLD4_R_2D_U32_F32;
3268 case NVPTXISD::Tld4G2DU64Float:
3269 Opc = NVPTX::TLD4_G_2D_U32_F32;
3271 case NVPTXISD::Tld4B2DU64Float:
3272 Opc = NVPTX::TLD4_B_2D_U32_F32;
3274 case NVPTXISD::Tld4A2DU64Float:
3275 Opc = NVPTX::TLD4_A_2D_U32_F32;
3277 case NVPTXISD::TexUnified1DFloatS32:
3278 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3280 case NVPTXISD::TexUnified1DFloatFloat:
3281 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3283 case NVPTXISD::TexUnified1DFloatFloatLevel:
3284 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3286 case NVPTXISD::TexUnified1DFloatFloatGrad:
3287 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3289 case NVPTXISD::TexUnified1DS32S32:
3290 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3292 case NVPTXISD::TexUnified1DS32Float:
3293 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3295 case NVPTXISD::TexUnified1DS32FloatLevel:
3296 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3298 case NVPTXISD::TexUnified1DS32FloatGrad:
3299 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3301 case NVPTXISD::TexUnified1DU32S32:
3302 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3304 case NVPTXISD::TexUnified1DU32Float:
3305 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3307 case NVPTXISD::TexUnified1DU32FloatLevel:
3308 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3310 case NVPTXISD::TexUnified1DU32FloatGrad:
3311 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3313 case NVPTXISD::TexUnified1DArrayFloatS32:
3314 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3316 case NVPTXISD::TexUnified1DArrayFloatFloat:
3317 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3319 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
3320 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3322 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
3323 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3325 case NVPTXISD::TexUnified1DArrayS32S32:
3326 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3328 case NVPTXISD::TexUnified1DArrayS32Float:
3329 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3331 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
3332 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3334 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
3335 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3337 case NVPTXISD::TexUnified1DArrayU32S32:
3338 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3340 case NVPTXISD::TexUnified1DArrayU32Float:
3341 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3343 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
3344 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3346 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
3347 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3349 case NVPTXISD::TexUnified2DFloatS32:
3350 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3352 case NVPTXISD::TexUnified2DFloatFloat:
3353 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3355 case NVPTXISD::TexUnified2DFloatFloatLevel:
3356 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3358 case NVPTXISD::TexUnified2DFloatFloatGrad:
3359 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3361 case NVPTXISD::TexUnified2DS32S32:
3362 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3364 case NVPTXISD::TexUnified2DS32Float:
3365 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3367 case NVPTXISD::TexUnified2DS32FloatLevel:
3368 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3370 case NVPTXISD::TexUnified2DS32FloatGrad:
3371 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3373 case NVPTXISD::TexUnified2DU32S32:
3374 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3376 case NVPTXISD::TexUnified2DU32Float:
3377 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3379 case NVPTXISD::TexUnified2DU32FloatLevel:
3380 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3382 case NVPTXISD::TexUnified2DU32FloatGrad:
3383 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3385 case NVPTXISD::TexUnified2DArrayFloatS32:
3386 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3388 case NVPTXISD::TexUnified2DArrayFloatFloat:
3389 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3391 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
3392 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3394 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
3395 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3397 case NVPTXISD::TexUnified2DArrayS32S32:
3398 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3400 case NVPTXISD::TexUnified2DArrayS32Float:
3401 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3403 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
3404 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3406 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
3407 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3409 case NVPTXISD::TexUnified2DArrayU32S32:
3410 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3412 case NVPTXISD::TexUnified2DArrayU32Float:
3413 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3415 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
3416 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3418 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
3419 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3421 case NVPTXISD::TexUnified3DFloatS32:
3422 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3424 case NVPTXISD::TexUnified3DFloatFloat:
3425 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3427 case NVPTXISD::TexUnified3DFloatFloatLevel:
3428 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3430 case NVPTXISD::TexUnified3DFloatFloatGrad:
3431 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3433 case NVPTXISD::TexUnified3DS32S32:
3434 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3436 case NVPTXISD::TexUnified3DS32Float:
3437 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3439 case NVPTXISD::TexUnified3DS32FloatLevel:
3440 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3442 case NVPTXISD::TexUnified3DS32FloatGrad:
3443 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3445 case NVPTXISD::TexUnified3DU32S32:
3446 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3448 case NVPTXISD::TexUnified3DU32Float:
3449 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3451 case NVPTXISD::TexUnified3DU32FloatLevel:
3452 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3454 case NVPTXISD::TexUnified3DU32FloatGrad:
3455 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3457 case NVPTXISD::TexUnifiedCubeFloatFloat:
3458 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3460 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
3461 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3463 case NVPTXISD::TexUnifiedCubeS32Float:
3464 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3466 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
3467 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3469 case NVPTXISD::TexUnifiedCubeU32Float:
3470 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3472 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
3473 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3475 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
3476 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3478 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
3479 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3481 case NVPTXISD::TexUnifiedCubeArrayS32Float:
3482 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3484 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
3485 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3487 case NVPTXISD::TexUnifiedCubeArrayU32Float:
3488 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3490 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
3491 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3493 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
3494 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3496 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
3497 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3499 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
3500 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3502 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
3503 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3505 case NVPTXISD::Tld4UnifiedR2DS64Float:
3506 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3508 case NVPTXISD::Tld4UnifiedG2DS64Float:
3509 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3511 case NVPTXISD::Tld4UnifiedB2DS64Float:
3512 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3514 case NVPTXISD::Tld4UnifiedA2DS64Float:
3515 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3517 case NVPTXISD::Tld4UnifiedR2DU64Float:
3518 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3520 case NVPTXISD::Tld4UnifiedG2DU64Float:
3521 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3523 case NVPTXISD::Tld4UnifiedB2DU64Float:
3524 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3526 case NVPTXISD::Tld4UnifiedA2DU64Float:
3527 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
3531 // Copy over operands
3532 for (unsigned i = 1; i < N->getNumOperands(); ++i) {
3533 Ops.push_back(N->getOperand(i));
3536 Ops.push_back(Chain);
3537 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3541 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
3542 SDValue Chain = N->getOperand(0);
3543 SDValue TexHandle = N->getOperand(1);
3544 SDNode *Ret = nullptr;
3546 SmallVector<SDValue, 8> Ops;
3547 switch (N->getOpcode()) {
3548 default: return nullptr;
3549 case NVPTXISD::Suld1DI8Clamp:
3550 Opc = NVPTX::SULD_1D_I8_CLAMP;
3551 Ops.push_back(TexHandle);
3552 Ops.push_back(N->getOperand(2));
3553 Ops.push_back(Chain);
3555 case NVPTXISD::Suld1DI16Clamp:
3556 Opc = NVPTX::SULD_1D_I16_CLAMP;
3557 Ops.push_back(TexHandle);
3558 Ops.push_back(N->getOperand(2));
3559 Ops.push_back(Chain);
3561 case NVPTXISD::Suld1DI32Clamp:
3562 Opc = NVPTX::SULD_1D_I32_CLAMP;
3563 Ops.push_back(TexHandle);
3564 Ops.push_back(N->getOperand(2));
3565 Ops.push_back(Chain);
3567 case NVPTXISD::Suld1DI64Clamp:
3568 Opc = NVPTX::SULD_1D_I64_CLAMP;
3569 Ops.push_back(TexHandle);
3570 Ops.push_back(N->getOperand(2));
3571 Ops.push_back(Chain);
3573 case NVPTXISD::Suld1DV2I8Clamp:
3574 Opc = NVPTX::SULD_1D_V2I8_CLAMP;
3575 Ops.push_back(TexHandle);
3576 Ops.push_back(N->getOperand(2));
3577 Ops.push_back(Chain);
3579 case NVPTXISD::Suld1DV2I16Clamp:
3580 Opc = NVPTX::SULD_1D_V2I16_CLAMP;
3581 Ops.push_back(TexHandle);
3582 Ops.push_back(N->getOperand(2));
3583 Ops.push_back(Chain);
3585 case NVPTXISD::Suld1DV2I32Clamp:
3586 Opc = NVPTX::SULD_1D_V2I32_CLAMP;
3587 Ops.push_back(TexHandle);
3588 Ops.push_back(N->getOperand(2));
3589 Ops.push_back(Chain);
3591 case NVPTXISD::Suld1DV2I64Clamp:
3592 Opc = NVPTX::SULD_1D_V2I64_CLAMP;
3593 Ops.push_back(TexHandle);
3594 Ops.push_back(N->getOperand(2));
3595 Ops.push_back(Chain);
3597 case NVPTXISD::Suld1DV4I8Clamp:
3598 Opc = NVPTX::SULD_1D_V4I8_CLAMP;
3599 Ops.push_back(TexHandle);
3600 Ops.push_back(N->getOperand(2));
3601 Ops.push_back(Chain);
3603 case NVPTXISD::Suld1DV4I16Clamp:
3604 Opc = NVPTX::SULD_1D_V4I16_CLAMP;
3605 Ops.push_back(TexHandle);
3606 Ops.push_back(N->getOperand(2));
3607 Ops.push_back(Chain);
3609 case NVPTXISD::Suld1DV4I32Clamp:
3610 Opc = NVPTX::SULD_1D_V4I32_CLAMP;
3611 Ops.push_back(TexHandle);
3612 Ops.push_back(N->getOperand(2));
3613 Ops.push_back(Chain);
3615 case NVPTXISD::Suld1DArrayI8Clamp:
3616 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
3617 Ops.push_back(TexHandle);
3618 Ops.push_back(N->getOperand(2));
3619 Ops.push_back(N->getOperand(3));
3620 Ops.push_back(Chain);
3622 case NVPTXISD::Suld1DArrayI16Clamp:
3623 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
3624 Ops.push_back(TexHandle);
3625 Ops.push_back(N->getOperand(2));
3626 Ops.push_back(N->getOperand(3));
3627 Ops.push_back(Chain);
3629 case NVPTXISD::Suld1DArrayI32Clamp:
3630 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
3631 Ops.push_back(TexHandle);
3632 Ops.push_back(N->getOperand(2));
3633 Ops.push_back(N->getOperand(3));
3634 Ops.push_back(Chain);
3636 case NVPTXISD::Suld1DArrayI64Clamp:
3637 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
3638 Ops.push_back(TexHandle);
3639 Ops.push_back(N->getOperand(2));
3640 Ops.push_back(N->getOperand(3));
3641 Ops.push_back(Chain);
3643 case NVPTXISD::Suld1DArrayV2I8Clamp:
3644 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
3645 Ops.push_back(TexHandle);
3646 Ops.push_back(N->getOperand(2));
3647 Ops.push_back(N->getOperand(3));
3648 Ops.push_back(Chain);
3650 case NVPTXISD::Suld1DArrayV2I16Clamp:
3651 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
3652 Ops.push_back(TexHandle);
3653 Ops.push_back(N->getOperand(2));
3654 Ops.push_back(N->getOperand(3));
3655 Ops.push_back(Chain);
3657 case NVPTXISD::Suld1DArrayV2I32Clamp:
3658 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
3659 Ops.push_back(TexHandle);
3660 Ops.push_back(N->getOperand(2));
3661 Ops.push_back(N->getOperand(3));
3662 Ops.push_back(Chain);
3664 case NVPTXISD::Suld1DArrayV2I64Clamp:
3665 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
3666 Ops.push_back(TexHandle);
3667 Ops.push_back(N->getOperand(2));
3668 Ops.push_back(N->getOperand(3));
3669 Ops.push_back(Chain);
3671 case NVPTXISD::Suld1DArrayV4I8Clamp:
3672 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
3673 Ops.push_back(TexHandle);
3674 Ops.push_back(N->getOperand(2));
3675 Ops.push_back(N->getOperand(3));
3676 Ops.push_back(Chain);
3678 case NVPTXISD::Suld1DArrayV4I16Clamp:
3679 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
3680 Ops.push_back(TexHandle);
3681 Ops.push_back(N->getOperand(2));
3682 Ops.push_back(N->getOperand(3));
3683 Ops.push_back(Chain);
3685 case NVPTXISD::Suld1DArrayV4I32Clamp:
3686 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
3687 Ops.push_back(TexHandle);
3688 Ops.push_back(N->getOperand(2));
3689 Ops.push_back(N->getOperand(3));
3690 Ops.push_back(Chain);
3692 case NVPTXISD::Suld2DI8Clamp:
3693 Opc = NVPTX::SULD_2D_I8_CLAMP;
3694 Ops.push_back(TexHandle);
3695 Ops.push_back(N->getOperand(2));
3696 Ops.push_back(N->getOperand(3));
3697 Ops.push_back(Chain);
3699 case NVPTXISD::Suld2DI16Clamp:
3700 Opc = NVPTX::SULD_2D_I16_CLAMP;
3701 Ops.push_back(TexHandle);
3702 Ops.push_back(N->getOperand(2));
3703 Ops.push_back(N->getOperand(3));
3704 Ops.push_back(Chain);
3706 case NVPTXISD::Suld2DI32Clamp:
3707 Opc = NVPTX::SULD_2D_I32_CLAMP;
3708 Ops.push_back(TexHandle);
3709 Ops.push_back(N->getOperand(2));
3710 Ops.push_back(N->getOperand(3));
3711 Ops.push_back(Chain);
3713 case NVPTXISD::Suld2DI64Clamp:
3714 Opc = NVPTX::SULD_2D_I64_CLAMP;
3715 Ops.push_back(TexHandle);
3716 Ops.push_back(N->getOperand(2));
3717 Ops.push_back(N->getOperand(3));
3718 Ops.push_back(Chain);
3720 case NVPTXISD::Suld2DV2I8Clamp:
3721 Opc = NVPTX::SULD_2D_V2I8_CLAMP;
3722 Ops.push_back(TexHandle);
3723 Ops.push_back(N->getOperand(2));
3724 Ops.push_back(N->getOperand(3));
3725 Ops.push_back(Chain);
3727 case NVPTXISD::Suld2DV2I16Clamp:
3728 Opc = NVPTX::SULD_2D_V2I16_CLAMP;
3729 Ops.push_back(TexHandle);
3730 Ops.push_back(N->getOperand(2));
3731 Ops.push_back(N->getOperand(3));
3732 Ops.push_back(Chain);
3734 case NVPTXISD::Suld2DV2I32Clamp:
3735 Opc = NVPTX::SULD_2D_V2I32_CLAMP;
3736 Ops.push_back(TexHandle);
3737 Ops.push_back(N->getOperand(2));
3738 Ops.push_back(N->getOperand(3));
3739 Ops.push_back(Chain);
3741 case NVPTXISD::Suld2DV2I64Clamp:
3742 Opc = NVPTX::SULD_2D_V2I64_CLAMP;
3743 Ops.push_back(TexHandle);
3744 Ops.push_back(N->getOperand(2));
3745 Ops.push_back(N->getOperand(3));
3746 Ops.push_back(Chain);
3748 case NVPTXISD::Suld2DV4I8Clamp:
3749 Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3750 Ops.push_back(TexHandle);
3751 Ops.push_back(N->getOperand(2));
3752 Ops.push_back(N->getOperand(3));
3753 Ops.push_back(Chain);
3755 case NVPTXISD::Suld2DV4I16Clamp:
3756 Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3757 Ops.push_back(TexHandle);
3758 Ops.push_back(N->getOperand(2));
3759 Ops.push_back(N->getOperand(3));
3760 Ops.push_back(Chain);
3762 case NVPTXISD::Suld2DV4I32Clamp:
3763 Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3764 Ops.push_back(TexHandle);
3765 Ops.push_back(N->getOperand(2));
3766 Ops.push_back(N->getOperand(3));
3767 Ops.push_back(Chain);
3769 case NVPTXISD::Suld2DArrayI8Clamp:
3770 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3771 Ops.push_back(TexHandle);
3772 Ops.push_back(N->getOperand(2));
3773 Ops.push_back(N->getOperand(3));
3774 Ops.push_back(N->getOperand(4));
3775 Ops.push_back(Chain);
3777 case NVPTXISD::Suld2DArrayI16Clamp:
3778 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3779 Ops.push_back(TexHandle);
3780 Ops.push_back(N->getOperand(2));
3781 Ops.push_back(N->getOperand(3));
3782 Ops.push_back(N->getOperand(4));
3783 Ops.push_back(Chain);
3785 case NVPTXISD::Suld2DArrayI32Clamp:
3786 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3787 Ops.push_back(TexHandle);
3788 Ops.push_back(N->getOperand(2));
3789 Ops.push_back(N->getOperand(3));
3790 Ops.push_back(N->getOperand(4));
3791 Ops.push_back(Chain);
3793 case NVPTXISD::Suld2DArrayI64Clamp:
3794 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3795 Ops.push_back(TexHandle);
3796 Ops.push_back(N->getOperand(2));
3797 Ops.push_back(N->getOperand(3));
3798 Ops.push_back(N->getOperand(4));
3799 Ops.push_back(Chain);
3801 case NVPTXISD::Suld2DArrayV2I8Clamp:
3802 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3803 Ops.push_back(TexHandle);
3804 Ops.push_back(N->getOperand(2));
3805 Ops.push_back(N->getOperand(3));
3806 Ops.push_back(N->getOperand(4));
3807 Ops.push_back(Chain);
3809 case NVPTXISD::Suld2DArrayV2I16Clamp:
3810 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3811 Ops.push_back(TexHandle);
3812 Ops.push_back(N->getOperand(2));
3813 Ops.push_back(N->getOperand(3));
3814 Ops.push_back(N->getOperand(4));
3815 Ops.push_back(Chain);
3817 case NVPTXISD::Suld2DArrayV2I32Clamp:
3818 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3819 Ops.push_back(TexHandle);
3820 Ops.push_back(N->getOperand(2));
3821 Ops.push_back(N->getOperand(3));
3822 Ops.push_back(N->getOperand(4));
3823 Ops.push_back(Chain);
3825 case NVPTXISD::Suld2DArrayV2I64Clamp:
3826 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3827 Ops.push_back(TexHandle);
3828 Ops.push_back(N->getOperand(2));
3829 Ops.push_back(N->getOperand(3));
3830 Ops.push_back(N->getOperand(4));
3831 Ops.push_back(Chain);
3833 case NVPTXISD::Suld2DArrayV4I8Clamp:
3834 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3835 Ops.push_back(TexHandle);
3836 Ops.push_back(N->getOperand(2));
3837 Ops.push_back(N->getOperand(3));
3838 Ops.push_back(N->getOperand(4));
3839 Ops.push_back(Chain);
3841 case NVPTXISD::Suld2DArrayV4I16Clamp:
3842 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3843 Ops.push_back(TexHandle);
3844 Ops.push_back(N->getOperand(2));
3845 Ops.push_back(N->getOperand(3));
3846 Ops.push_back(N->getOperand(4));
3847 Ops.push_back(Chain);
3849 case NVPTXISD::Suld2DArrayV4I32Clamp:
3850 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3851 Ops.push_back(TexHandle);
3852 Ops.push_back(N->getOperand(2));
3853 Ops.push_back(N->getOperand(3));
3854 Ops.push_back(N->getOperand(4));
3855 Ops.push_back(Chain);
3857 case NVPTXISD::Suld3DI8Clamp:
3858 Opc = NVPTX::SULD_3D_I8_CLAMP;
3859 Ops.push_back(TexHandle);
3860 Ops.push_back(N->getOperand(2));
3861 Ops.push_back(N->getOperand(3));
3862 Ops.push_back(N->getOperand(4));
3863 Ops.push_back(Chain);
3865 case NVPTXISD::Suld3DI16Clamp:
3866 Opc = NVPTX::SULD_3D_I16_CLAMP;
3867 Ops.push_back(TexHandle);
3868 Ops.push_back(N->getOperand(2));
3869 Ops.push_back(N->getOperand(3));
3870 Ops.push_back(N->getOperand(4));
3871 Ops.push_back(Chain);
3873 case NVPTXISD::Suld3DI32Clamp:
3874 Opc = NVPTX::SULD_3D_I32_CLAMP;
3875 Ops.push_back(TexHandle);
3876 Ops.push_back(N->getOperand(2));
3877 Ops.push_back(N->getOperand(3));
3878 Ops.push_back(N->getOperand(4));
3879 Ops.push_back(Chain);
3881 case NVPTXISD::Suld3DI64Clamp:
3882 Opc = NVPTX::SULD_3D_I64_CLAMP;
3883 Ops.push_back(TexHandle);
3884 Ops.push_back(N->getOperand(2));
3885 Ops.push_back(N->getOperand(3));
3886 Ops.push_back(N->getOperand(4));
3887 Ops.push_back(Chain);
3889 case NVPTXISD::Suld3DV2I8Clamp:
3890 Opc = NVPTX::SULD_3D_V2I8_CLAMP;
3891 Ops.push_back(TexHandle);
3892 Ops.push_back(N->getOperand(2));
3893 Ops.push_back(N->getOperand(3));
3894 Ops.push_back(N->getOperand(4));
3895 Ops.push_back(Chain);
3897 case NVPTXISD::Suld3DV2I16Clamp:
3898 Opc = NVPTX::SULD_3D_V2I16_CLAMP;
3899 Ops.push_back(TexHandle);
3900 Ops.push_back(N->getOperand(2));
3901 Ops.push_back(N->getOperand(3));
3902 Ops.push_back(N->getOperand(4));
3903 Ops.push_back(Chain);
3905 case NVPTXISD::Suld3DV2I32Clamp:
3906 Opc = NVPTX::SULD_3D_V2I32_CLAMP;
3907 Ops.push_back(TexHandle);
3908 Ops.push_back(N->getOperand(2));
3909 Ops.push_back(N->getOperand(3));
3910 Ops.push_back(N->getOperand(4));
3911 Ops.push_back(Chain);
3913 case NVPTXISD::Suld3DV2I64Clamp:
3914 Opc = NVPTX::SULD_3D_V2I64_CLAMP;
3915 Ops.push_back(TexHandle);
3916 Ops.push_back(N->getOperand(2));
3917 Ops.push_back(N->getOperand(3));
3918 Ops.push_back(N->getOperand(4));
3919 Ops.push_back(Chain);
3921 case NVPTXISD::Suld3DV4I8Clamp:
3922 Opc = NVPTX::SULD_3D_V4I8_CLAMP;
3923 Ops.push_back(TexHandle);
3924 Ops.push_back(N->getOperand(2));
3925 Ops.push_back(N->getOperand(3));
3926 Ops.push_back(N->getOperand(4));
3927 Ops.push_back(Chain);
3929 case NVPTXISD::Suld3DV4I16Clamp:
3930 Opc = NVPTX::SULD_3D_V4I16_CLAMP;
3931 Ops.push_back(TexHandle);
3932 Ops.push_back(N->getOperand(2));
3933 Ops.push_back(N->getOperand(3));
3934 Ops.push_back(N->getOperand(4));
3935 Ops.push_back(Chain);
3937 case NVPTXISD::Suld3DV4I32Clamp:
3938 Opc = NVPTX::SULD_3D_V4I32_CLAMP;
3939 Ops.push_back(TexHandle);
3940 Ops.push_back(N->getOperand(2));
3941 Ops.push_back(N->getOperand(3));
3942 Ops.push_back(N->getOperand(4));
3943 Ops.push_back(Chain);
3945 case NVPTXISD::Suld1DI8Trap:
3946 Opc = NVPTX::SULD_1D_I8_TRAP;
3947 Ops.push_back(TexHandle);
3948 Ops.push_back(N->getOperand(2));
3949 Ops.push_back(Chain);
3951 case NVPTXISD::Suld1DI16Trap:
3952 Opc = NVPTX::SULD_1D_I16_TRAP;
3953 Ops.push_back(TexHandle);
3954 Ops.push_back(N->getOperand(2));
3955 Ops.push_back(Chain);
3957 case NVPTXISD::Suld1DI32Trap:
3958 Opc = NVPTX::SULD_1D_I32_TRAP;
3959 Ops.push_back(TexHandle);
3960 Ops.push_back(N->getOperand(2));
3961 Ops.push_back(Chain);
3963 case NVPTXISD::Suld1DI64Trap:
3964 Opc = NVPTX::SULD_1D_I64_TRAP;
3965 Ops.push_back(TexHandle);
3966 Ops.push_back(N->getOperand(2));
3967 Ops.push_back(Chain);
3969 case NVPTXISD::Suld1DV2I8Trap:
3970 Opc = NVPTX::SULD_1D_V2I8_TRAP;
3971 Ops.push_back(TexHandle);
3972 Ops.push_back(N->getOperand(2));
3973 Ops.push_back(Chain);
3975 case NVPTXISD::Suld1DV2I16Trap:
3976 Opc = NVPTX::SULD_1D_V2I16_TRAP;
3977 Ops.push_back(TexHandle);
3978 Ops.push_back(N->getOperand(2));
3979 Ops.push_back(Chain);
3981 case NVPTXISD::Suld1DV2I32Trap:
3982 Opc = NVPTX::SULD_1D_V2I32_TRAP;
3983 Ops.push_back(TexHandle);
3984 Ops.push_back(N->getOperand(2));
3985 Ops.push_back(Chain);
3987 case NVPTXISD::Suld1DV2I64Trap:
3988 Opc = NVPTX::SULD_1D_V2I64_TRAP;
3989 Ops.push_back(TexHandle);
3990 Ops.push_back(N->getOperand(2));
3991 Ops.push_back(Chain);
3993 case NVPTXISD::Suld1DV4I8Trap:
3994 Opc = NVPTX::SULD_1D_V4I8_TRAP;
3995 Ops.push_back(TexHandle);
3996 Ops.push_back(N->getOperand(2));
3997 Ops.push_back(Chain);
3999 case NVPTXISD::Suld1DV4I16Trap:
4000 Opc = NVPTX::SULD_1D_V4I16_TRAP;
4001 Ops.push_back(TexHandle);
4002 Ops.push_back(N->getOperand(2));
4003 Ops.push_back(Chain);
4005 case NVPTXISD::Suld1DV4I32Trap:
4006 Opc = NVPTX::SULD_1D_V4I32_TRAP;
4007 Ops.push_back(TexHandle);
4008 Ops.push_back(N->getOperand(2));
4009 Ops.push_back(Chain);
4011 case NVPTXISD::Suld1DArrayI8Trap:
4012 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
4013 Ops.push_back(TexHandle);
4014 Ops.push_back(N->getOperand(2));
4015 Ops.push_back(N->getOperand(3));
4016 Ops.push_back(Chain);
4018 case NVPTXISD::Suld1DArrayI16Trap:
4019 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4020 Ops.push_back(TexHandle);
4021 Ops.push_back(N->getOperand(2));
4022 Ops.push_back(N->getOperand(3));
4023 Ops.push_back(Chain);
4025 case NVPTXISD::Suld1DArrayI32Trap:
4026 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4027 Ops.push_back(TexHandle);
4028 Ops.push_back(N->getOperand(2));
4029 Ops.push_back(N->getOperand(3));
4030 Ops.push_back(Chain);
4032 case NVPTXISD::Suld1DArrayI64Trap:
4033 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4034 Ops.push_back(TexHandle);
4035 Ops.push_back(N->getOperand(2));
4036 Ops.push_back(N->getOperand(3));
4037 Ops.push_back(Chain);
4039 case NVPTXISD::Suld1DArrayV2I8Trap:
4040 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4041 Ops.push_back(TexHandle);
4042 Ops.push_back(N->getOperand(2));
4043 Ops.push_back(N->getOperand(3));
4044 Ops.push_back(Chain);
4046 case NVPTXISD::Suld1DArrayV2I16Trap:
4047 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4048 Ops.push_back(TexHandle);
4049 Ops.push_back(N->getOperand(2));
4050 Ops.push_back(N->getOperand(3));
4051 Ops.push_back(Chain);
4053 case NVPTXISD::Suld1DArrayV2I32Trap:
4054 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4055 Ops.push_back(TexHandle);
4056 Ops.push_back(N->getOperand(2));
4057 Ops.push_back(N->getOperand(3));
4058 Ops.push_back(Chain);
4060 case NVPTXISD::Suld1DArrayV2I64Trap:
4061 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4062 Ops.push_back(TexHandle);
4063 Ops.push_back(N->getOperand(2));
4064 Ops.push_back(N->getOperand(3));
4065 Ops.push_back(Chain);
4067 case NVPTXISD::Suld1DArrayV4I8Trap:
4068 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4069 Ops.push_back(TexHandle);
4070 Ops.push_back(N->getOperand(2));
4071 Ops.push_back(N->getOperand(3));
4072 Ops.push_back(Chain);
4074 case NVPTXISD::Suld1DArrayV4I16Trap:
4075 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4076 Ops.push_back(TexHandle);
4077 Ops.push_back(N->getOperand(2));
4078 Ops.push_back(N->getOperand(3));
4079 Ops.push_back(Chain);
4081 case NVPTXISD::Suld1DArrayV4I32Trap:
4082 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4083 Ops.push_back(TexHandle);
4084 Ops.push_back(N->getOperand(2));
4085 Ops.push_back(N->getOperand(3));
4086 Ops.push_back(Chain);
4088 case NVPTXISD::Suld2DI8Trap:
4089 Opc = NVPTX::SULD_2D_I8_TRAP;
4090 Ops.push_back(TexHandle);
4091 Ops.push_back(N->getOperand(2));
4092 Ops.push_back(N->getOperand(3));
4093 Ops.push_back(Chain);
4095 case NVPTXISD::Suld2DI16Trap:
4096 Opc = NVPTX::SULD_2D_I16_TRAP;
4097 Ops.push_back(TexHandle);
4098 Ops.push_back(N->getOperand(2));
4099 Ops.push_back(N->getOperand(3));
4100 Ops.push_back(Chain);
4102 case NVPTXISD::Suld2DI32Trap:
4103 Opc = NVPTX::SULD_2D_I32_TRAP;
4104 Ops.push_back(TexHandle);
4105 Ops.push_back(N->getOperand(2));
4106 Ops.push_back(N->getOperand(3));
4107 Ops.push_back(Chain);
4109 case NVPTXISD::Suld2DI64Trap:
4110 Opc = NVPTX::SULD_2D_I64_TRAP;
4111 Ops.push_back(TexHandle);
4112 Ops.push_back(N->getOperand(2));
4113 Ops.push_back(N->getOperand(3));
4114 Ops.push_back(Chain);
4116 case NVPTXISD::Suld2DV2I8Trap:
4117 Opc = NVPTX::SULD_2D_V2I8_TRAP;
4118 Ops.push_back(TexHandle);
4119 Ops.push_back(N->getOperand(2));
4120 Ops.push_back(N->getOperand(3));
4121 Ops.push_back(Chain);
4123 case NVPTXISD::Suld2DV2I16Trap:
4124 Opc = NVPTX::SULD_2D_V2I16_TRAP;
4125 Ops.push_back(TexHandle);
4126 Ops.push_back(N->getOperand(2));
4127 Ops.push_back(N->getOperand(3));
4128 Ops.push_back(Chain);
4130 case NVPTXISD::Suld2DV2I32Trap:
4131 Opc = NVPTX::SULD_2D_V2I32_TRAP;
4132 Ops.push_back(TexHandle);
4133 Ops.push_back(N->getOperand(2));
4134 Ops.push_back(N->getOperand(3));
4135 Ops.push_back(Chain);
4137 case NVPTXISD::Suld2DV2I64Trap:
4138 Opc = NVPTX::SULD_2D_V2I64_TRAP;
4139 Ops.push_back(TexHandle);
4140 Ops.push_back(N->getOperand(2));
4141 Ops.push_back(N->getOperand(3));
4142 Ops.push_back(Chain);
4144 case NVPTXISD::Suld2DV4I8Trap:
4145 Opc = NVPTX::SULD_2D_V4I8_TRAP;
4146 Ops.push_back(TexHandle);
4147 Ops.push_back(N->getOperand(2));
4148 Ops.push_back(N->getOperand(3));
4149 Ops.push_back(Chain);
4151 case NVPTXISD::Suld2DV4I16Trap:
4152 Opc = NVPTX::SULD_2D_V4I16_TRAP;
4153 Ops.push_back(TexHandle);
4154 Ops.push_back(N->getOperand(2));
4155 Ops.push_back(N->getOperand(3));
4156 Ops.push_back(Chain);
4158 case NVPTXISD::Suld2DV4I32Trap:
4159 Opc = NVPTX::SULD_2D_V4I32_TRAP;
4160 Ops.push_back(TexHandle);
4161 Ops.push_back(N->getOperand(2));
4162 Ops.push_back(N->getOperand(3));
4163 Ops.push_back(Chain);
4165 case NVPTXISD::Suld2DArrayI8Trap:
4166 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4167 Ops.push_back(TexHandle);
4168 Ops.push_back(N->getOperand(2));
4169 Ops.push_back(N->getOperand(3));
4170 Ops.push_back(N->getOperand(4));
4171 Ops.push_back(Chain);
4173 case NVPTXISD::Suld2DArrayI16Trap:
4174 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4175 Ops.push_back(TexHandle);
4176 Ops.push_back(N->getOperand(2));
4177 Ops.push_back(N->getOperand(3));
4178 Ops.push_back(N->getOperand(4));
4179 Ops.push_back(Chain);
4181 case NVPTXISD::Suld2DArrayI32Trap:
4182 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4183 Ops.push_back(TexHandle);
4184 Ops.push_back(N->getOperand(2));
4185 Ops.push_back(N->getOperand(3));
4186 Ops.push_back(N->getOperand(4));
4187 Ops.push_back(Chain);
4189 case NVPTXISD::Suld2DArrayI64Trap:
4190 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4191 Ops.push_back(TexHandle);
4192 Ops.push_back(N->getOperand(2));
4193 Ops.push_back(N->getOperand(3));
4194 Ops.push_back(N->getOperand(4));
4195 Ops.push_back(Chain);
4197 case NVPTXISD::Suld2DArrayV2I8Trap:
4198 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4199 Ops.push_back(TexHandle);
4200 Ops.push_back(N->getOperand(2));
4201 Ops.push_back(N->getOperand(3));
4202 Ops.push_back(N->getOperand(4));
4203 Ops.push_back(Chain);
4205 case NVPTXISD::Suld2DArrayV2I16Trap:
4206 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4207 Ops.push_back(TexHandle);
4208 Ops.push_back(N->getOperand(2));
4209 Ops.push_back(N->getOperand(3));
4210 Ops.push_back(N->getOperand(4));
4211 Ops.push_back(Chain);
4213 case NVPTXISD::Suld2DArrayV2I32Trap:
4214 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4215 Ops.push_back(TexHandle);
4216 Ops.push_back(N->getOperand(2));
4217 Ops.push_back(N->getOperand(3));
4218 Ops.push_back(N->getOperand(4));
4219 Ops.push_back(Chain);
4221 case NVPTXISD::Suld2DArrayV2I64Trap:
4222 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4223 Ops.push_back(TexHandle);
4224 Ops.push_back(N->getOperand(2));
4225 Ops.push_back(N->getOperand(3));
4226 Ops.push_back(N->getOperand(4));
4227 Ops.push_back(Chain);
4229 case NVPTXISD::Suld2DArrayV4I8Trap:
4230 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4231 Ops.push_back(TexHandle);
4232 Ops.push_back(N->getOperand(2));
4233 Ops.push_back(N->getOperand(3));
4234 Ops.push_back(N->getOperand(4));
4235 Ops.push_back(Chain);
4237 case NVPTXISD::Suld2DArrayV4I16Trap:
4238 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4239 Ops.push_back(TexHandle);
4240 Ops.push_back(N->getOperand(2));
4241 Ops.push_back(N->getOperand(3));
4242 Ops.push_back(N->getOperand(4));
4243 Ops.push_back(Chain);
4245 case NVPTXISD::Suld2DArrayV4I32Trap:
4246 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4247 Ops.push_back(TexHandle);
4248 Ops.push_back(N->getOperand(2));
4249 Ops.push_back(N->getOperand(3));
4250 Ops.push_back(N->getOperand(4));
4251 Ops.push_back(Chain);
4253 case NVPTXISD::Suld3DI8Trap:
4254 Opc = NVPTX::SULD_3D_I8_TRAP;
4255 Ops.push_back(TexHandle);
4256 Ops.push_back(N->getOperand(2));
4257 Ops.push_back(N->getOperand(3));
4258 Ops.push_back(N->getOperand(4));
4259 Ops.push_back(Chain);
4261 case NVPTXISD::Suld3DI16Trap:
4262 Opc = NVPTX::SULD_3D_I16_TRAP;
4263 Ops.push_back(TexHandle);
4264 Ops.push_back(N->getOperand(2));
4265 Ops.push_back(N->getOperand(3));
4266 Ops.push_back(N->getOperand(4));
4267 Ops.push_back(Chain);
4269 case NVPTXISD::Suld3DI32Trap:
4270 Opc = NVPTX::SULD_3D_I32_TRAP;
4271 Ops.push_back(TexHandle);
4272 Ops.push_back(N->getOperand(2));
4273 Ops.push_back(N->getOperand(3));
4274 Ops.push_back(N->getOperand(4));
4275 Ops.push_back(Chain);
4277 case NVPTXISD::Suld3DI64Trap:
4278 Opc = NVPTX::SULD_3D_I64_TRAP;
4279 Ops.push_back(TexHandle);
4280 Ops.push_back(N->getOperand(2));
4281 Ops.push_back(N->getOperand(3));
4282 Ops.push_back(N->getOperand(4));
4283 Ops.push_back(Chain);
4285 case NVPTXISD::Suld3DV2I8Trap:
4286 Opc = NVPTX::SULD_3D_V2I8_TRAP;
4287 Ops.push_back(TexHandle);
4288 Ops.push_back(N->getOperand(2));
4289 Ops.push_back(N->getOperand(3));
4290 Ops.push_back(N->getOperand(4));
4291 Ops.push_back(Chain);
4293 case NVPTXISD::Suld3DV2I16Trap:
4294 Opc = NVPTX::SULD_3D_V2I16_TRAP;
4295 Ops.push_back(TexHandle);
4296 Ops.push_back(N->getOperand(2));
4297 Ops.push_back(N->getOperand(3));
4298 Ops.push_back(N->getOperand(4));
4299 Ops.push_back(Chain);
4301 case NVPTXISD::Suld3DV2I32Trap:
4302 Opc = NVPTX::SULD_3D_V2I32_TRAP;
4303 Ops.push_back(TexHandle);
4304 Ops.push_back(N->getOperand(2));
4305 Ops.push_back(N->getOperand(3));
4306 Ops.push_back(N->getOperand(4));
4307 Ops.push_back(Chain);
4309 case NVPTXISD::Suld3DV2I64Trap:
4310 Opc = NVPTX::SULD_3D_V2I64_TRAP;
4311 Ops.push_back(TexHandle);
4312 Ops.push_back(N->getOperand(2));
4313 Ops.push_back(N->getOperand(3));
4314 Ops.push_back(N->getOperand(4));
4315 Ops.push_back(Chain);
4317 case NVPTXISD::Suld3DV4I8Trap:
4318 Opc = NVPTX::SULD_3D_V4I8_TRAP;
4319 Ops.push_back(TexHandle);
4320 Ops.push_back(N->getOperand(2));
4321 Ops.push_back(N->getOperand(3));
4322 Ops.push_back(N->getOperand(4));
4323 Ops.push_back(Chain);
4325 case NVPTXISD::Suld3DV4I16Trap:
4326 Opc = NVPTX::SULD_3D_V4I16_TRAP;
4327 Ops.push_back(TexHandle);
4328 Ops.push_back(N->getOperand(2));
4329 Ops.push_back(N->getOperand(3));
4330 Ops.push_back(N->getOperand(4));
4331 Ops.push_back(Chain);
4333 case NVPTXISD::Suld3DV4I32Trap:
4334 Opc = NVPTX::SULD_3D_V4I32_TRAP;
4335 Ops.push_back(TexHandle);
4336 Ops.push_back(N->getOperand(2));
4337 Ops.push_back(N->getOperand(3));
4338 Ops.push_back(N->getOperand(4));
4339 Ops.push_back(Chain);
4341 case NVPTXISD::Suld1DI8Zero:
4342 Opc = NVPTX::SULD_1D_I8_ZERO;
4343 Ops.push_back(TexHandle);
4344 Ops.push_back(N->getOperand(2));
4345 Ops.push_back(Chain);
4347 case NVPTXISD::Suld1DI16Zero:
4348 Opc = NVPTX::SULD_1D_I16_ZERO;
4349 Ops.push_back(TexHandle);
4350 Ops.push_back(N->getOperand(2));
4351 Ops.push_back(Chain);
4353 case NVPTXISD::Suld1DI32Zero:
4354 Opc = NVPTX::SULD_1D_I32_ZERO;
4355 Ops.push_back(TexHandle);
4356 Ops.push_back(N->getOperand(2));
4357 Ops.push_back(Chain);
4359 case NVPTXISD::Suld1DI64Zero:
4360 Opc = NVPTX::SULD_1D_I64_ZERO;
4361 Ops.push_back(TexHandle);
4362 Ops.push_back(N->getOperand(2));
4363 Ops.push_back(Chain);
4365 case NVPTXISD::Suld1DV2I8Zero:
4366 Opc = NVPTX::SULD_1D_V2I8_ZERO;
4367 Ops.push_back(TexHandle);
4368 Ops.push_back(N->getOperand(2));
4369 Ops.push_back(Chain);
4371 case NVPTXISD::Suld1DV2I16Zero:
4372 Opc = NVPTX::SULD_1D_V2I16_ZERO;
4373 Ops.push_back(TexHandle);
4374 Ops.push_back(N->getOperand(2));
4375 Ops.push_back(Chain);
4377 case NVPTXISD::Suld1DV2I32Zero:
4378 Opc = NVPTX::SULD_1D_V2I32_ZERO;
4379 Ops.push_back(TexHandle);
4380 Ops.push_back(N->getOperand(2));
4381 Ops.push_back(Chain);
4383 case NVPTXISD::Suld1DV2I64Zero:
4384 Opc = NVPTX::SULD_1D_V2I64_ZERO;
4385 Ops.push_back(TexHandle);
4386 Ops.push_back(N->getOperand(2));
4387 Ops.push_back(Chain);
4389 case NVPTXISD::Suld1DV4I8Zero:
4390 Opc = NVPTX::SULD_1D_V4I8_ZERO;
4391 Ops.push_back(TexHandle);
4392 Ops.push_back(N->getOperand(2));
4393 Ops.push_back(Chain);
4395 case NVPTXISD::Suld1DV4I16Zero:
4396 Opc = NVPTX::SULD_1D_V4I16_ZERO;
4397 Ops.push_back(TexHandle);
4398 Ops.push_back(N->getOperand(2));
4399 Ops.push_back(Chain);
4401 case NVPTXISD::Suld1DV4I32Zero:
4402 Opc = NVPTX::SULD_1D_V4I32_ZERO;
4403 Ops.push_back(TexHandle);
4404 Ops.push_back(N->getOperand(2));
4405 Ops.push_back(Chain);
4407 case NVPTXISD::Suld1DArrayI8Zero:
4408 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4409 Ops.push_back(TexHandle);
4410 Ops.push_back(N->getOperand(2));
4411 Ops.push_back(N->getOperand(3));
4412 Ops.push_back(Chain);
4414 case NVPTXISD::Suld1DArrayI16Zero:
4415 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4416 Ops.push_back(TexHandle);
4417 Ops.push_back(N->getOperand(2));
4418 Ops.push_back(N->getOperand(3));
4419 Ops.push_back(Chain);
4421 case NVPTXISD::Suld1DArrayI32Zero:
4422 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4423 Ops.push_back(TexHandle);
4424 Ops.push_back(N->getOperand(2));
4425 Ops.push_back(N->getOperand(3));
4426 Ops.push_back(Chain);
4428 case NVPTXISD::Suld1DArrayI64Zero:
4429 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4430 Ops.push_back(TexHandle);
4431 Ops.push_back(N->getOperand(2));
4432 Ops.push_back(N->getOperand(3));
4433 Ops.push_back(Chain);
4435 case NVPTXISD::Suld1DArrayV2I8Zero:
4436 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4437 Ops.push_back(TexHandle);
4438 Ops.push_back(N->getOperand(2));
4439 Ops.push_back(N->getOperand(3));
4440 Ops.push_back(Chain);
4442 case NVPTXISD::Suld1DArrayV2I16Zero:
4443 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4444 Ops.push_back(TexHandle);
4445 Ops.push_back(N->getOperand(2));
4446 Ops.push_back(N->getOperand(3));
4447 Ops.push_back(Chain);
4449 case NVPTXISD::Suld1DArrayV2I32Zero:
4450 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4451 Ops.push_back(TexHandle);
4452 Ops.push_back(N->getOperand(2));
4453 Ops.push_back(N->getOperand(3));
4454 Ops.push_back(Chain);
4456 case NVPTXISD::Suld1DArrayV2I64Zero:
4457 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4458 Ops.push_back(TexHandle);
4459 Ops.push_back(N->getOperand(2));
4460 Ops.push_back(N->getOperand(3));
4461 Ops.push_back(Chain);
4463 case NVPTXISD::Suld1DArrayV4I8Zero:
4464 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4465 Ops.push_back(TexHandle);
4466 Ops.push_back(N->getOperand(2));
4467 Ops.push_back(N->getOperand(3));
4468 Ops.push_back(Chain);
4470 case NVPTXISD::Suld1DArrayV4I16Zero:
4471 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4472 Ops.push_back(TexHandle);
4473 Ops.push_back(N->getOperand(2));
4474 Ops.push_back(N->getOperand(3));
4475 Ops.push_back(Chain);
4477 case NVPTXISD::Suld1DArrayV4I32Zero:
4478 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4479 Ops.push_back(TexHandle);
4480 Ops.push_back(N->getOperand(2));
4481 Ops.push_back(N->getOperand(3));
4482 Ops.push_back(Chain);
4484 case NVPTXISD::Suld2DI8Zero:
4485 Opc = NVPTX::SULD_2D_I8_ZERO;
4486 Ops.push_back(TexHandle);
4487 Ops.push_back(N->getOperand(2));
4488 Ops.push_back(N->getOperand(3));
4489 Ops.push_back(Chain);
4491 case NVPTXISD::Suld2DI16Zero:
4492 Opc = NVPTX::SULD_2D_I16_ZERO;
4493 Ops.push_back(TexHandle);
4494 Ops.push_back(N->getOperand(2));
4495 Ops.push_back(N->getOperand(3));
4496 Ops.push_back(Chain);
4498 case NVPTXISD::Suld2DI32Zero:
4499 Opc = NVPTX::SULD_2D_I32_ZERO;
4500 Ops.push_back(TexHandle);
4501 Ops.push_back(N->getOperand(2));
4502 Ops.push_back(N->getOperand(3));
4503 Ops.push_back(Chain);
4505 case NVPTXISD::Suld2DI64Zero:
4506 Opc = NVPTX::SULD_2D_I64_ZERO;
4507 Ops.push_back(TexHandle);
4508 Ops.push_back(N->getOperand(2));
4509 Ops.push_back(N->getOperand(3));
4510 Ops.push_back(Chain);
4512 case NVPTXISD::Suld2DV2I8Zero:
4513 Opc = NVPTX::SULD_2D_V2I8_ZERO;
4514 Ops.push_back(TexHandle);
4515 Ops.push_back(N->getOperand(2));
4516 Ops.push_back(N->getOperand(3));
4517 Ops.push_back(Chain);
4519 case NVPTXISD::Suld2DV2I16Zero:
4520 Opc = NVPTX::SULD_2D_V2I16_ZERO;
4521 Ops.push_back(TexHandle);
4522 Ops.push_back(N->getOperand(2));
4523 Ops.push_back(N->getOperand(3));
4524 Ops.push_back(Chain);
4526 case NVPTXISD::Suld2DV2I32Zero:
4527 Opc = NVPTX::SULD_2D_V2I32_ZERO;
4528 Ops.push_back(TexHandle);
4529 Ops.push_back(N->getOperand(2));
4530 Ops.push_back(N->getOperand(3));
4531 Ops.push_back(Chain);
4533 case NVPTXISD::Suld2DV2I64Zero:
4534 Opc = NVPTX::SULD_2D_V2I64_ZERO;
4535 Ops.push_back(TexHandle);
4536 Ops.push_back(N->getOperand(2));
4537 Ops.push_back(N->getOperand(3));
4538 Ops.push_back(Chain);
4540 case NVPTXISD::Suld2DV4I8Zero:
4541 Opc = NVPTX::SULD_2D_V4I8_ZERO;
4542 Ops.push_back(TexHandle);
4543 Ops.push_back(N->getOperand(2));
4544 Ops.push_back(N->getOperand(3));
4545 Ops.push_back(Chain);
4547 case NVPTXISD::Suld2DV4I16Zero:
4548 Opc = NVPTX::SULD_2D_V4I16_ZERO;
4549 Ops.push_back(TexHandle);
4550 Ops.push_back(N->getOperand(2));
4551 Ops.push_back(N->getOperand(3));
4552 Ops.push_back(Chain);
4554 case NVPTXISD::Suld2DV4I32Zero:
4555 Opc = NVPTX::SULD_2D_V4I32_ZERO;
4556 Ops.push_back(TexHandle);
4557 Ops.push_back(N->getOperand(2));
4558 Ops.push_back(N->getOperand(3));
4559 Ops.push_back(Chain);
4561 case NVPTXISD::Suld2DArrayI8Zero:
4562 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
4563 Ops.push_back(TexHandle);
4564 Ops.push_back(N->getOperand(2));
4565 Ops.push_back(N->getOperand(3));
4566 Ops.push_back(N->getOperand(4));
4567 Ops.push_back(Chain);
4569 case NVPTXISD::Suld2DArrayI16Zero:
4570 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
4571 Ops.push_back(TexHandle);
4572 Ops.push_back(N->getOperand(2));
4573 Ops.push_back(N->getOperand(3));
4574 Ops.push_back(N->getOperand(4));
4575 Ops.push_back(Chain);
4577 case NVPTXISD::Suld2DArrayI32Zero:
4578 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
4579 Ops.push_back(TexHandle);
4580 Ops.push_back(N->getOperand(2));
4581 Ops.push_back(N->getOperand(3));
4582 Ops.push_back(N->getOperand(4));
4583 Ops.push_back(Chain);
4585 case NVPTXISD::Suld2DArrayI64Zero:
4586 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
4587 Ops.push_back(TexHandle);
4588 Ops.push_back(N->getOperand(2));
4589 Ops.push_back(N->getOperand(3));
4590 Ops.push_back(N->getOperand(4));
4591 Ops.push_back(Chain);
4593 case NVPTXISD::Suld2DArrayV2I8Zero:
4594 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
4595 Ops.push_back(TexHandle);
4596 Ops.push_back(N->getOperand(2));
4597 Ops.push_back(N->getOperand(3));
4598 Ops.push_back(N->getOperand(4));
4599 Ops.push_back(Chain);
4601 case NVPTXISD::Suld2DArrayV2I16Zero:
4602 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
4603 Ops.push_back(TexHandle);
4604 Ops.push_back(N->getOperand(2));
4605 Ops.push_back(N->getOperand(3));
4606 Ops.push_back(N->getOperand(4));
4607 Ops.push_back(Chain);
4609 case NVPTXISD::Suld2DArrayV2I32Zero:
4610 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
4611 Ops.push_back(TexHandle);
4612 Ops.push_back(N->getOperand(2));
4613 Ops.push_back(N->getOperand(3));
4614 Ops.push_back(N->getOperand(4));
4615 Ops.push_back(Chain);
4617 case NVPTXISD::Suld2DArrayV2I64Zero:
4618 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
4619 Ops.push_back(TexHandle);
4620 Ops.push_back(N->getOperand(2));
4621 Ops.push_back(N->getOperand(3));
4622 Ops.push_back(N->getOperand(4));
4623 Ops.push_back(Chain);
4625 case NVPTXISD::Suld2DArrayV4I8Zero:
4626 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
4627 Ops.push_back(TexHandle);
4628 Ops.push_back(N->getOperand(2));
4629 Ops.push_back(N->getOperand(3));
4630 Ops.push_back(N->getOperand(4));
4631 Ops.push_back(Chain);
4633 case NVPTXISD::Suld2DArrayV4I16Zero:
4634 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
4635 Ops.push_back(TexHandle);
4636 Ops.push_back(N->getOperand(2));
4637 Ops.push_back(N->getOperand(3));
4638 Ops.push_back(N->getOperand(4));
4639 Ops.push_back(Chain);
4641 case NVPTXISD::Suld2DArrayV4I32Zero:
4642 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
4643 Ops.push_back(TexHandle);
4644 Ops.push_back(N->getOperand(2));
4645 Ops.push_back(N->getOperand(3));
4646 Ops.push_back(N->getOperand(4));
4647 Ops.push_back(Chain);
4649 case NVPTXISD::Suld3DI8Zero:
4650 Opc = NVPTX::SULD_3D_I8_ZERO;
4651 Ops.push_back(TexHandle);
4652 Ops.push_back(N->getOperand(2));
4653 Ops.push_back(N->getOperand(3));
4654 Ops.push_back(N->getOperand(4));
4655 Ops.push_back(Chain);
4657 case NVPTXISD::Suld3DI16Zero:
4658 Opc = NVPTX::SULD_3D_I16_ZERO;
4659 Ops.push_back(TexHandle);
4660 Ops.push_back(N->getOperand(2));
4661 Ops.push_back(N->getOperand(3));
4662 Ops.push_back(N->getOperand(4));
4663 Ops.push_back(Chain);
4665 case NVPTXISD::Suld3DI32Zero:
4666 Opc = NVPTX::SULD_3D_I32_ZERO;
4667 Ops.push_back(TexHandle);
4668 Ops.push_back(N->getOperand(2));
4669 Ops.push_back(N->getOperand(3));
4670 Ops.push_back(N->getOperand(4));
4671 Ops.push_back(Chain);
4673 case NVPTXISD::Suld3DI64Zero:
4674 Opc = NVPTX::SULD_3D_I64_ZERO;
4675 Ops.push_back(TexHandle);
4676 Ops.push_back(N->getOperand(2));
4677 Ops.push_back(N->getOperand(3));
4678 Ops.push_back(N->getOperand(4));
4679 Ops.push_back(Chain);
4681 case NVPTXISD::Suld3DV2I8Zero:
4682 Opc = NVPTX::SULD_3D_V2I8_ZERO;
4683 Ops.push_back(TexHandle);
4684 Ops.push_back(N->getOperand(2));
4685 Ops.push_back(N->getOperand(3));
4686 Ops.push_back(N->getOperand(4));
4687 Ops.push_back(Chain);
4689 case NVPTXISD::Suld3DV2I16Zero:
4690 Opc = NVPTX::SULD_3D_V2I16_ZERO;
4691 Ops.push_back(TexHandle);
4692 Ops.push_back(N->getOperand(2));
4693 Ops.push_back(N->getOperand(3));
4694 Ops.push_back(N->getOperand(4));
4695 Ops.push_back(Chain);
4697 case NVPTXISD::Suld3DV2I32Zero:
4698 Opc = NVPTX::SULD_3D_V2I32_ZERO;
4699 Ops.push_back(TexHandle);
4700 Ops.push_back(N->getOperand(2));
4701 Ops.push_back(N->getOperand(3));
4702 Ops.push_back(N->getOperand(4));
4703 Ops.push_back(Chain);
4705 case NVPTXISD::Suld3DV2I64Zero:
4706 Opc = NVPTX::SULD_3D_V2I64_ZERO;
4707 Ops.push_back(TexHandle);
4708 Ops.push_back(N->getOperand(2));
4709 Ops.push_back(N->getOperand(3));
4710 Ops.push_back(N->getOperand(4));
4711 Ops.push_back(Chain);
4713 case NVPTXISD::Suld3DV4I8Zero:
4714 Opc = NVPTX::SULD_3D_V4I8_ZERO;
4715 Ops.push_back(TexHandle);
4716 Ops.push_back(N->getOperand(2));
4717 Ops.push_back(N->getOperand(3));
4718 Ops.push_back(N->getOperand(4));
4719 Ops.push_back(Chain);
4721 case NVPTXISD::Suld3DV4I16Zero:
4722 Opc = NVPTX::SULD_3D_V4I16_ZERO;
4723 Ops.push_back(TexHandle);
4724 Ops.push_back(N->getOperand(2));
4725 Ops.push_back(N->getOperand(3));
4726 Ops.push_back(N->getOperand(4));
4727 Ops.push_back(Chain);
4729 case NVPTXISD::Suld3DV4I32Zero:
4730 Opc = NVPTX::SULD_3D_V4I32_ZERO;
4731 Ops.push_back(TexHandle);
4732 Ops.push_back(N->getOperand(2));
4733 Ops.push_back(N->getOperand(3));
4734 Ops.push_back(N->getOperand(4));
4735 Ops.push_back(Chain);
4738 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4743 /// SelectBFE - Look for instruction sequences that can be made more efficient
4744 /// by using the 'bfe' (bit-field extract) PTX instruction
4745 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
4746 SDValue LHS = N->getOperand(0);
4747 SDValue RHS = N->getOperand(1);
4751 bool IsSigned = false;
4753 if (N->getOpcode() == ISD::AND) {
4754 // Canonicalize the operands
4755 // We want 'and %val, %mask'
4756 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4757 std::swap(LHS, RHS);
4760 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
4762 // We need a constant mask on the RHS of the AND
4766 // Extract the mask bits
4767 uint64_t MaskVal = Mask->getZExtValue();
4768 if (!isMask_64(MaskVal)) {
4769 // We *could* handle shifted masks here, but doing so would require an
4770 // 'and' operation to fix up the low-order bits so we would trade
4771 // shr+and for bfe+and, which has the same throughput
4775 // How many bits are in our mask?
4776 uint64_t NumBits = countTrailingOnes(MaskVal);
4777 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
4779 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4780 // We have a 'srl/and' pair, extract the effective start bit and length
4781 Val = LHS.getNode()->getOperand(0);
4782 Start = LHS.getNode()->getOperand(1);
4783 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4785 uint64_t StartVal = StartConst->getZExtValue();
4786 // How many "good" bits do we have left? "good" is defined here as bits
4787 // that exist in the original value, not shifted in.
4788 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
4789 if (NumBits > GoodBits) {
4790 // Do not handle the case where bits have been shifted in. In theory
4791 // we could handle this, but the cost is likely higher than just
4792 // emitting the srl/and pair.
4795 Start = CurDAG->getTargetConstant(StartVal, MVT::i32);
4797 // Do not handle the case where the shift amount (can be zero if no srl
4798 // was found) is not constant. We could handle this case, but it would
4799 // require run-time logic that would be more expensive than just
4800 // emitting the srl/and pair.
4804 // Do not handle the case where the LHS of the and is not a shift. While
4805 // it would be trivial to handle this case, it would just transform
4806 // 'and' -> 'bfe', but 'and' has higher-throughput.
4809 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4810 if (LHS->getOpcode() == ISD::AND) {
4811 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4813 // Shift amount must be constant
4817 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4819 SDValue AndLHS = LHS->getOperand(0);
4820 SDValue AndRHS = LHS->getOperand(1);
4822 // Canonicalize the AND to have the mask on the RHS
4823 if (isa<ConstantSDNode>(AndLHS)) {
4824 std::swap(AndLHS, AndRHS);
4827 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4829 // Mask must be constant
4833 uint64_t MaskVal = MaskCnst->getZExtValue();
4836 if (isMask_64(MaskVal)) {
4838 // The number of bits in the result bitfield will be the number of
4839 // trailing ones (the AND) minus the number of bits we shift off
4840 NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
4841 } else if (isShiftedMask_64(MaskVal)) {
4842 NumZeros = countTrailingZeros(MaskVal);
4843 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
4844 // The number of bits in the result bitfield will be the number of
4845 // trailing zeros plus the number of set bits in the mask minus the
4846 // number of bits we shift off
4847 NumBits = NumZeros + NumOnes - ShiftAmt;
4849 // This is not a mask we can handle
4853 if (ShiftAmt < NumZeros) {
4854 // Handling this case would require extra logic that would make this
4855 // transformation non-profitable
4860 Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32);
4861 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
4862 } else if (LHS->getOpcode() == ISD::SHL) {
4863 // Here, we have a pattern like:
4865 // (sra (shl val, NN), MM)
4867 // (srl (shl val, NN), MM)
4869 // If MM >= NN, we can efficiently optimize this with bfe
4870 Val = LHS->getOperand(0);
4872 SDValue ShlRHS = LHS->getOperand(1);
4873 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
4875 // Shift amount must be constant
4878 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
4880 SDValue ShrRHS = RHS;
4881 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
4883 // Shift amount must be constant
4886 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
4888 // To avoid extra codegen and be profitable, we need Outer >= Inner
4889 if (OuterShiftAmt < InnerShiftAmt) {
4893 // If the outer shift is more than the type size, we have no bitfield to
4894 // extract (since we also check that the inner shift is <= the outer shift
4895 // then this also implies that the inner shift is < the type size)
4896 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
4901 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32);
4903 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
4904 OuterShiftAmt, MVT::i32);
4906 if (N->getOpcode() == ISD::SRA) {
4907 // If we have a arithmetic right shift, we need to use the signed bfe
4922 // For the BFE operations we form here from "and" and "srl", always use the
4923 // unsigned variants.
4924 if (Val.getValueType() == MVT::i32) {
4926 Opc = NVPTX::BFE_S32rii;
4928 Opc = NVPTX::BFE_U32rii;
4930 } else if (Val.getValueType() == MVT::i64) {
4932 Opc = NVPTX::BFE_S64rii;
4934 Opc = NVPTX::BFE_U64rii;
4937 // We cannot handle this type
4946 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4951 // SelectDirectAddr - Match a direct address for DAG.
4952 // A direct address could be a globaladdress or externalsymbol.
4953 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
4954 // Return true if TGA or ES.
4955 if (N.getOpcode() == ISD::TargetGlobalAddress ||
4956 N.getOpcode() == ISD::TargetExternalSymbol) {
4960 if (N.getOpcode() == NVPTXISD::Wrapper) {
4961 Address = N.getOperand(0);
4964 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
4965 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
4966 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
4967 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
4968 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
4974 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
4975 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
4976 if (Addr.getOpcode() == ISD::ADD) {
4977 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
4978 SDValue base = Addr.getOperand(0);
4979 if (SelectDirectAddr(base, Base)) {
4980 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
4989 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
4990 SDValue &Base, SDValue &Offset) {
4991 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
4995 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
4996 SDValue &Base, SDValue &Offset) {
4997 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
5001 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
5002 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5003 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
5004 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5005 Offset = CurDAG->getTargetConstant(0, mvt);
5008 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
5009 Addr.getOpcode() == ISD::TargetGlobalAddress)
5010 return false; // direct calls.
5012 if (Addr.getOpcode() == ISD::ADD) {
5013 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
5016 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5017 if (FrameIndexSDNode *FIN =
5018 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
5019 // Constant offset from frame ref.
5020 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5022 Base = Addr.getOperand(0);
5023 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
5031 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5032 SDValue &Base, SDValue &Offset) {
5033 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5037 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5038 SDValue &Base, SDValue &Offset) {
5039 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5042 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5043 unsigned int spN) const {
5044 const Value *Src = nullptr;
5045 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
5046 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5048 Src = mN->getMemOperand()->getValue();
5052 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
5053 return (PT->getAddressSpace() == spN);
5057 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5058 /// inline asm expressions.
5059 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
5060 const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
5062 switch (ConstraintCode) {
5066 if (SelectDirectAddr(Op, Op0)) {
5067 OutOps.push_back(Op0);
5068 OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
5071 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5072 OutOps.push_back(Op0);
5073 OutOps.push_back(Op1);