1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines an instruction selector for the NVPTX target.
12 //===----------------------------------------------------------------------===//
14 #include "NVPTXISelDAGToDAG.h"
15 #include "llvm/IR/GlobalValue.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/Target/TargetIntrinsicInfo.h"
25 #define DEBUG_TYPE "nvptx-isel"
27 static cl::opt<int> UsePrecDivF32(
28 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
29 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
30 " IEEE Compliant F32 div.rnd if available."),
34 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
35 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
39 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
40 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
44 /// createNVPTXISelDag - This pass converts a legalized DAG into a
45 /// NVPTX-specific DAG, ready for instruction scheduling.
46 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
47 llvm::CodeGenOpt::Level OptLevel) {
48 return new NVPTXDAGToDAGISel(TM, OptLevel);
51 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
52 CodeGenOpt::Level OptLevel)
53 : SelectionDAGISel(tm, OptLevel), TM(tm) {
54 doMulWide = (OptLevel > 0);
57 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
58 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
59 return SelectionDAGISel::runOnMachineFunction(MF);
62 int NVPTXDAGToDAGISel::getDivF32Level() const {
63 if (UsePrecDivF32.getNumOccurrences() > 0) {
64 // If nvptx-prec-div32=N is used on the command-line, always honor it
67 // Otherwise, use div.approx if fast math is enabled
68 if (TM.Options.UnsafeFPMath)
75 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
76 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
77 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
78 return UsePrecSqrtF32;
80 // Otherwise, use sqrt.approx if fast math is enabled
81 return !TM.Options.UnsafeFPMath;
85 bool NVPTXDAGToDAGISel::useF32FTZ() const {
86 if (FtzEnabled.getNumOccurrences() > 0) {
87 // If nvptx-f32ftz is used on the command-line, always honor it
90 const Function *F = MF->getFunction();
91 // Otherwise, check for an nvptx-f32ftz attribute on the function
92 if (F->hasFnAttribute("nvptx-f32ftz"))
93 return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
99 bool NVPTXDAGToDAGISel::allowFMA() const {
100 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
101 return TL->allowFMA(*MF, OptLevel);
104 /// Select - Select instructions not customized! Used for
105 /// expanded, promoted and normal instructions.
106 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
108 if (N->isMachineOpcode()) {
110 return nullptr; // Already selected.
113 SDNode *ResNode = nullptr;
114 switch (N->getOpcode()) {
116 ResNode = SelectLoad(N);
119 ResNode = SelectStore(N);
121 case NVPTXISD::LoadV2:
122 case NVPTXISD::LoadV4:
123 ResNode = SelectLoadVector(N);
125 case NVPTXISD::LDGV2:
126 case NVPTXISD::LDGV4:
127 case NVPTXISD::LDUV2:
128 case NVPTXISD::LDUV4:
129 ResNode = SelectLDGLDU(N);
131 case NVPTXISD::StoreV2:
132 case NVPTXISD::StoreV4:
133 ResNode = SelectStoreVector(N);
135 case NVPTXISD::LoadParam:
136 case NVPTXISD::LoadParamV2:
137 case NVPTXISD::LoadParamV4:
138 ResNode = SelectLoadParam(N);
140 case NVPTXISD::StoreRetval:
141 case NVPTXISD::StoreRetvalV2:
142 case NVPTXISD::StoreRetvalV4:
143 ResNode = SelectStoreRetval(N);
145 case NVPTXISD::StoreParam:
146 case NVPTXISD::StoreParamV2:
147 case NVPTXISD::StoreParamV4:
148 case NVPTXISD::StoreParamS32:
149 case NVPTXISD::StoreParamU32:
150 ResNode = SelectStoreParam(N);
152 case ISD::INTRINSIC_WO_CHAIN:
153 ResNode = SelectIntrinsicNoChain(N);
155 case ISD::INTRINSIC_W_CHAIN:
156 ResNode = SelectIntrinsicChain(N);
158 case NVPTXISD::Tex1DFloatS32:
159 case NVPTXISD::Tex1DFloatFloat:
160 case NVPTXISD::Tex1DFloatFloatLevel:
161 case NVPTXISD::Tex1DFloatFloatGrad:
162 case NVPTXISD::Tex1DS32S32:
163 case NVPTXISD::Tex1DS32Float:
164 case NVPTXISD::Tex1DS32FloatLevel:
165 case NVPTXISD::Tex1DS32FloatGrad:
166 case NVPTXISD::Tex1DU32S32:
167 case NVPTXISD::Tex1DU32Float:
168 case NVPTXISD::Tex1DU32FloatLevel:
169 case NVPTXISD::Tex1DU32FloatGrad:
170 case NVPTXISD::Tex1DArrayFloatS32:
171 case NVPTXISD::Tex1DArrayFloatFloat:
172 case NVPTXISD::Tex1DArrayFloatFloatLevel:
173 case NVPTXISD::Tex1DArrayFloatFloatGrad:
174 case NVPTXISD::Tex1DArrayS32S32:
175 case NVPTXISD::Tex1DArrayS32Float:
176 case NVPTXISD::Tex1DArrayS32FloatLevel:
177 case NVPTXISD::Tex1DArrayS32FloatGrad:
178 case NVPTXISD::Tex1DArrayU32S32:
179 case NVPTXISD::Tex1DArrayU32Float:
180 case NVPTXISD::Tex1DArrayU32FloatLevel:
181 case NVPTXISD::Tex1DArrayU32FloatGrad:
182 case NVPTXISD::Tex2DFloatS32:
183 case NVPTXISD::Tex2DFloatFloat:
184 case NVPTXISD::Tex2DFloatFloatLevel:
185 case NVPTXISD::Tex2DFloatFloatGrad:
186 case NVPTXISD::Tex2DS32S32:
187 case NVPTXISD::Tex2DS32Float:
188 case NVPTXISD::Tex2DS32FloatLevel:
189 case NVPTXISD::Tex2DS32FloatGrad:
190 case NVPTXISD::Tex2DU32S32:
191 case NVPTXISD::Tex2DU32Float:
192 case NVPTXISD::Tex2DU32FloatLevel:
193 case NVPTXISD::Tex2DU32FloatGrad:
194 case NVPTXISD::Tex2DArrayFloatS32:
195 case NVPTXISD::Tex2DArrayFloatFloat:
196 case NVPTXISD::Tex2DArrayFloatFloatLevel:
197 case NVPTXISD::Tex2DArrayFloatFloatGrad:
198 case NVPTXISD::Tex2DArrayS32S32:
199 case NVPTXISD::Tex2DArrayS32Float:
200 case NVPTXISD::Tex2DArrayS32FloatLevel:
201 case NVPTXISD::Tex2DArrayS32FloatGrad:
202 case NVPTXISD::Tex2DArrayU32S32:
203 case NVPTXISD::Tex2DArrayU32Float:
204 case NVPTXISD::Tex2DArrayU32FloatLevel:
205 case NVPTXISD::Tex2DArrayU32FloatGrad:
206 case NVPTXISD::Tex3DFloatS32:
207 case NVPTXISD::Tex3DFloatFloat:
208 case NVPTXISD::Tex3DFloatFloatLevel:
209 case NVPTXISD::Tex3DFloatFloatGrad:
210 case NVPTXISD::Tex3DS32S32:
211 case NVPTXISD::Tex3DS32Float:
212 case NVPTXISD::Tex3DS32FloatLevel:
213 case NVPTXISD::Tex3DS32FloatGrad:
214 case NVPTXISD::Tex3DU32S32:
215 case NVPTXISD::Tex3DU32Float:
216 case NVPTXISD::Tex3DU32FloatLevel:
217 case NVPTXISD::Tex3DU32FloatGrad:
218 case NVPTXISD::TexCubeFloatFloat:
219 case NVPTXISD::TexCubeFloatFloatLevel:
220 case NVPTXISD::TexCubeS32Float:
221 case NVPTXISD::TexCubeS32FloatLevel:
222 case NVPTXISD::TexCubeU32Float:
223 case NVPTXISD::TexCubeU32FloatLevel:
224 case NVPTXISD::TexCubeArrayFloatFloat:
225 case NVPTXISD::TexCubeArrayFloatFloatLevel:
226 case NVPTXISD::TexCubeArrayS32Float:
227 case NVPTXISD::TexCubeArrayS32FloatLevel:
228 case NVPTXISD::TexCubeArrayU32Float:
229 case NVPTXISD::TexCubeArrayU32FloatLevel:
230 case NVPTXISD::Tld4R2DFloatFloat:
231 case NVPTXISD::Tld4G2DFloatFloat:
232 case NVPTXISD::Tld4B2DFloatFloat:
233 case NVPTXISD::Tld4A2DFloatFloat:
234 case NVPTXISD::Tld4R2DS64Float:
235 case NVPTXISD::Tld4G2DS64Float:
236 case NVPTXISD::Tld4B2DS64Float:
237 case NVPTXISD::Tld4A2DS64Float:
238 case NVPTXISD::Tld4R2DU64Float:
239 case NVPTXISD::Tld4G2DU64Float:
240 case NVPTXISD::Tld4B2DU64Float:
241 case NVPTXISD::Tld4A2DU64Float:
242 case NVPTXISD::TexUnified1DFloatS32:
243 case NVPTXISD::TexUnified1DFloatFloat:
244 case NVPTXISD::TexUnified1DFloatFloatLevel:
245 case NVPTXISD::TexUnified1DFloatFloatGrad:
246 case NVPTXISD::TexUnified1DS32S32:
247 case NVPTXISD::TexUnified1DS32Float:
248 case NVPTXISD::TexUnified1DS32FloatLevel:
249 case NVPTXISD::TexUnified1DS32FloatGrad:
250 case NVPTXISD::TexUnified1DU32S32:
251 case NVPTXISD::TexUnified1DU32Float:
252 case NVPTXISD::TexUnified1DU32FloatLevel:
253 case NVPTXISD::TexUnified1DU32FloatGrad:
254 case NVPTXISD::TexUnified1DArrayFloatS32:
255 case NVPTXISD::TexUnified1DArrayFloatFloat:
256 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
257 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
258 case NVPTXISD::TexUnified1DArrayS32S32:
259 case NVPTXISD::TexUnified1DArrayS32Float:
260 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
261 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
262 case NVPTXISD::TexUnified1DArrayU32S32:
263 case NVPTXISD::TexUnified1DArrayU32Float:
264 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
265 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
266 case NVPTXISD::TexUnified2DFloatS32:
267 case NVPTXISD::TexUnified2DFloatFloat:
268 case NVPTXISD::TexUnified2DFloatFloatLevel:
269 case NVPTXISD::TexUnified2DFloatFloatGrad:
270 case NVPTXISD::TexUnified2DS32S32:
271 case NVPTXISD::TexUnified2DS32Float:
272 case NVPTXISD::TexUnified2DS32FloatLevel:
273 case NVPTXISD::TexUnified2DS32FloatGrad:
274 case NVPTXISD::TexUnified2DU32S32:
275 case NVPTXISD::TexUnified2DU32Float:
276 case NVPTXISD::TexUnified2DU32FloatLevel:
277 case NVPTXISD::TexUnified2DU32FloatGrad:
278 case NVPTXISD::TexUnified2DArrayFloatS32:
279 case NVPTXISD::TexUnified2DArrayFloatFloat:
280 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
281 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
282 case NVPTXISD::TexUnified2DArrayS32S32:
283 case NVPTXISD::TexUnified2DArrayS32Float:
284 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
285 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
286 case NVPTXISD::TexUnified2DArrayU32S32:
287 case NVPTXISD::TexUnified2DArrayU32Float:
288 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
289 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
290 case NVPTXISD::TexUnified3DFloatS32:
291 case NVPTXISD::TexUnified3DFloatFloat:
292 case NVPTXISD::TexUnified3DFloatFloatLevel:
293 case NVPTXISD::TexUnified3DFloatFloatGrad:
294 case NVPTXISD::TexUnified3DS32S32:
295 case NVPTXISD::TexUnified3DS32Float:
296 case NVPTXISD::TexUnified3DS32FloatLevel:
297 case NVPTXISD::TexUnified3DS32FloatGrad:
298 case NVPTXISD::TexUnified3DU32S32:
299 case NVPTXISD::TexUnified3DU32Float:
300 case NVPTXISD::TexUnified3DU32FloatLevel:
301 case NVPTXISD::TexUnified3DU32FloatGrad:
302 case NVPTXISD::TexUnifiedCubeFloatFloat:
303 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
304 case NVPTXISD::TexUnifiedCubeS32Float:
305 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
306 case NVPTXISD::TexUnifiedCubeU32Float:
307 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
308 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
309 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
310 case NVPTXISD::TexUnifiedCubeArrayS32Float:
311 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
312 case NVPTXISD::TexUnifiedCubeArrayU32Float:
313 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
314 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
315 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
316 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
317 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
318 case NVPTXISD::Tld4UnifiedR2DS64Float:
319 case NVPTXISD::Tld4UnifiedG2DS64Float:
320 case NVPTXISD::Tld4UnifiedB2DS64Float:
321 case NVPTXISD::Tld4UnifiedA2DS64Float:
322 case NVPTXISD::Tld4UnifiedR2DU64Float:
323 case NVPTXISD::Tld4UnifiedG2DU64Float:
324 case NVPTXISD::Tld4UnifiedB2DU64Float:
325 case NVPTXISD::Tld4UnifiedA2DU64Float:
326 ResNode = SelectTextureIntrinsic(N);
328 case NVPTXISD::Suld1DI8Clamp:
329 case NVPTXISD::Suld1DI16Clamp:
330 case NVPTXISD::Suld1DI32Clamp:
331 case NVPTXISD::Suld1DI64Clamp:
332 case NVPTXISD::Suld1DV2I8Clamp:
333 case NVPTXISD::Suld1DV2I16Clamp:
334 case NVPTXISD::Suld1DV2I32Clamp:
335 case NVPTXISD::Suld1DV2I64Clamp:
336 case NVPTXISD::Suld1DV4I8Clamp:
337 case NVPTXISD::Suld1DV4I16Clamp:
338 case NVPTXISD::Suld1DV4I32Clamp:
339 case NVPTXISD::Suld1DArrayI8Clamp:
340 case NVPTXISD::Suld1DArrayI16Clamp:
341 case NVPTXISD::Suld1DArrayI32Clamp:
342 case NVPTXISD::Suld1DArrayI64Clamp:
343 case NVPTXISD::Suld1DArrayV2I8Clamp:
344 case NVPTXISD::Suld1DArrayV2I16Clamp:
345 case NVPTXISD::Suld1DArrayV2I32Clamp:
346 case NVPTXISD::Suld1DArrayV2I64Clamp:
347 case NVPTXISD::Suld1DArrayV4I8Clamp:
348 case NVPTXISD::Suld1DArrayV4I16Clamp:
349 case NVPTXISD::Suld1DArrayV4I32Clamp:
350 case NVPTXISD::Suld2DI8Clamp:
351 case NVPTXISD::Suld2DI16Clamp:
352 case NVPTXISD::Suld2DI32Clamp:
353 case NVPTXISD::Suld2DI64Clamp:
354 case NVPTXISD::Suld2DV2I8Clamp:
355 case NVPTXISD::Suld2DV2I16Clamp:
356 case NVPTXISD::Suld2DV2I32Clamp:
357 case NVPTXISD::Suld2DV2I64Clamp:
358 case NVPTXISD::Suld2DV4I8Clamp:
359 case NVPTXISD::Suld2DV4I16Clamp:
360 case NVPTXISD::Suld2DV4I32Clamp:
361 case NVPTXISD::Suld2DArrayI8Clamp:
362 case NVPTXISD::Suld2DArrayI16Clamp:
363 case NVPTXISD::Suld2DArrayI32Clamp:
364 case NVPTXISD::Suld2DArrayI64Clamp:
365 case NVPTXISD::Suld2DArrayV2I8Clamp:
366 case NVPTXISD::Suld2DArrayV2I16Clamp:
367 case NVPTXISD::Suld2DArrayV2I32Clamp:
368 case NVPTXISD::Suld2DArrayV2I64Clamp:
369 case NVPTXISD::Suld2DArrayV4I8Clamp:
370 case NVPTXISD::Suld2DArrayV4I16Clamp:
371 case NVPTXISD::Suld2DArrayV4I32Clamp:
372 case NVPTXISD::Suld3DI8Clamp:
373 case NVPTXISD::Suld3DI16Clamp:
374 case NVPTXISD::Suld3DI32Clamp:
375 case NVPTXISD::Suld3DI64Clamp:
376 case NVPTXISD::Suld3DV2I8Clamp:
377 case NVPTXISD::Suld3DV2I16Clamp:
378 case NVPTXISD::Suld3DV2I32Clamp:
379 case NVPTXISD::Suld3DV2I64Clamp:
380 case NVPTXISD::Suld3DV4I8Clamp:
381 case NVPTXISD::Suld3DV4I16Clamp:
382 case NVPTXISD::Suld3DV4I32Clamp:
383 case NVPTXISD::Suld1DI8Trap:
384 case NVPTXISD::Suld1DI16Trap:
385 case NVPTXISD::Suld1DI32Trap:
386 case NVPTXISD::Suld1DI64Trap:
387 case NVPTXISD::Suld1DV2I8Trap:
388 case NVPTXISD::Suld1DV2I16Trap:
389 case NVPTXISD::Suld1DV2I32Trap:
390 case NVPTXISD::Suld1DV2I64Trap:
391 case NVPTXISD::Suld1DV4I8Trap:
392 case NVPTXISD::Suld1DV4I16Trap:
393 case NVPTXISD::Suld1DV4I32Trap:
394 case NVPTXISD::Suld1DArrayI8Trap:
395 case NVPTXISD::Suld1DArrayI16Trap:
396 case NVPTXISD::Suld1DArrayI32Trap:
397 case NVPTXISD::Suld1DArrayI64Trap:
398 case NVPTXISD::Suld1DArrayV2I8Trap:
399 case NVPTXISD::Suld1DArrayV2I16Trap:
400 case NVPTXISD::Suld1DArrayV2I32Trap:
401 case NVPTXISD::Suld1DArrayV2I64Trap:
402 case NVPTXISD::Suld1DArrayV4I8Trap:
403 case NVPTXISD::Suld1DArrayV4I16Trap:
404 case NVPTXISD::Suld1DArrayV4I32Trap:
405 case NVPTXISD::Suld2DI8Trap:
406 case NVPTXISD::Suld2DI16Trap:
407 case NVPTXISD::Suld2DI32Trap:
408 case NVPTXISD::Suld2DI64Trap:
409 case NVPTXISD::Suld2DV2I8Trap:
410 case NVPTXISD::Suld2DV2I16Trap:
411 case NVPTXISD::Suld2DV2I32Trap:
412 case NVPTXISD::Suld2DV2I64Trap:
413 case NVPTXISD::Suld2DV4I8Trap:
414 case NVPTXISD::Suld2DV4I16Trap:
415 case NVPTXISD::Suld2DV4I32Trap:
416 case NVPTXISD::Suld2DArrayI8Trap:
417 case NVPTXISD::Suld2DArrayI16Trap:
418 case NVPTXISD::Suld2DArrayI32Trap:
419 case NVPTXISD::Suld2DArrayI64Trap:
420 case NVPTXISD::Suld2DArrayV2I8Trap:
421 case NVPTXISD::Suld2DArrayV2I16Trap:
422 case NVPTXISD::Suld2DArrayV2I32Trap:
423 case NVPTXISD::Suld2DArrayV2I64Trap:
424 case NVPTXISD::Suld2DArrayV4I8Trap:
425 case NVPTXISD::Suld2DArrayV4I16Trap:
426 case NVPTXISD::Suld2DArrayV4I32Trap:
427 case NVPTXISD::Suld3DI8Trap:
428 case NVPTXISD::Suld3DI16Trap:
429 case NVPTXISD::Suld3DI32Trap:
430 case NVPTXISD::Suld3DI64Trap:
431 case NVPTXISD::Suld3DV2I8Trap:
432 case NVPTXISD::Suld3DV2I16Trap:
433 case NVPTXISD::Suld3DV2I32Trap:
434 case NVPTXISD::Suld3DV2I64Trap:
435 case NVPTXISD::Suld3DV4I8Trap:
436 case NVPTXISD::Suld3DV4I16Trap:
437 case NVPTXISD::Suld3DV4I32Trap:
438 case NVPTXISD::Suld1DI8Zero:
439 case NVPTXISD::Suld1DI16Zero:
440 case NVPTXISD::Suld1DI32Zero:
441 case NVPTXISD::Suld1DI64Zero:
442 case NVPTXISD::Suld1DV2I8Zero:
443 case NVPTXISD::Suld1DV2I16Zero:
444 case NVPTXISD::Suld1DV2I32Zero:
445 case NVPTXISD::Suld1DV2I64Zero:
446 case NVPTXISD::Suld1DV4I8Zero:
447 case NVPTXISD::Suld1DV4I16Zero:
448 case NVPTXISD::Suld1DV4I32Zero:
449 case NVPTXISD::Suld1DArrayI8Zero:
450 case NVPTXISD::Suld1DArrayI16Zero:
451 case NVPTXISD::Suld1DArrayI32Zero:
452 case NVPTXISD::Suld1DArrayI64Zero:
453 case NVPTXISD::Suld1DArrayV2I8Zero:
454 case NVPTXISD::Suld1DArrayV2I16Zero:
455 case NVPTXISD::Suld1DArrayV2I32Zero:
456 case NVPTXISD::Suld1DArrayV2I64Zero:
457 case NVPTXISD::Suld1DArrayV4I8Zero:
458 case NVPTXISD::Suld1DArrayV4I16Zero:
459 case NVPTXISD::Suld1DArrayV4I32Zero:
460 case NVPTXISD::Suld2DI8Zero:
461 case NVPTXISD::Suld2DI16Zero:
462 case NVPTXISD::Suld2DI32Zero:
463 case NVPTXISD::Suld2DI64Zero:
464 case NVPTXISD::Suld2DV2I8Zero:
465 case NVPTXISD::Suld2DV2I16Zero:
466 case NVPTXISD::Suld2DV2I32Zero:
467 case NVPTXISD::Suld2DV2I64Zero:
468 case NVPTXISD::Suld2DV4I8Zero:
469 case NVPTXISD::Suld2DV4I16Zero:
470 case NVPTXISD::Suld2DV4I32Zero:
471 case NVPTXISD::Suld2DArrayI8Zero:
472 case NVPTXISD::Suld2DArrayI16Zero:
473 case NVPTXISD::Suld2DArrayI32Zero:
474 case NVPTXISD::Suld2DArrayI64Zero:
475 case NVPTXISD::Suld2DArrayV2I8Zero:
476 case NVPTXISD::Suld2DArrayV2I16Zero:
477 case NVPTXISD::Suld2DArrayV2I32Zero:
478 case NVPTXISD::Suld2DArrayV2I64Zero:
479 case NVPTXISD::Suld2DArrayV4I8Zero:
480 case NVPTXISD::Suld2DArrayV4I16Zero:
481 case NVPTXISD::Suld2DArrayV4I32Zero:
482 case NVPTXISD::Suld3DI8Zero:
483 case NVPTXISD::Suld3DI16Zero:
484 case NVPTXISD::Suld3DI32Zero:
485 case NVPTXISD::Suld3DI64Zero:
486 case NVPTXISD::Suld3DV2I8Zero:
487 case NVPTXISD::Suld3DV2I16Zero:
488 case NVPTXISD::Suld3DV2I32Zero:
489 case NVPTXISD::Suld3DV2I64Zero:
490 case NVPTXISD::Suld3DV4I8Zero:
491 case NVPTXISD::Suld3DV4I16Zero:
492 case NVPTXISD::Suld3DV4I32Zero:
493 ResNode = SelectSurfaceIntrinsic(N);
499 ResNode = SelectBFE(N);
501 case ISD::ADDRSPACECAST:
502 ResNode = SelectAddrSpaceCast(N);
509 return SelectCode(N);
512 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
513 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
517 case Intrinsic::nvvm_ldg_global_f:
518 case Intrinsic::nvvm_ldg_global_i:
519 case Intrinsic::nvvm_ldg_global_p:
520 case Intrinsic::nvvm_ldu_global_f:
521 case Intrinsic::nvvm_ldu_global_i:
522 case Intrinsic::nvvm_ldu_global_p:
523 return SelectLDGLDU(N);
527 static unsigned int getCodeAddrSpace(MemSDNode *N) {
528 const Value *Src = N->getMemOperand()->getValue();
531 return NVPTX::PTXLdStInstCode::GENERIC;
533 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
534 switch (PT->getAddressSpace()) {
535 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
536 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
537 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
538 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
539 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
540 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
544 return NVPTX::PTXLdStInstCode::GENERIC;
547 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
548 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
552 case Intrinsic::nvvm_texsurf_handle_internal:
553 return SelectTexSurfHandle(N);
557 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
558 // Op 0 is the intrinsic ID
559 SDValue Wrapper = N->getOperand(1);
560 SDValue GlobalVal = Wrapper.getOperand(0);
561 return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
565 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
566 SDValue Src = N->getOperand(0);
567 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
568 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
569 unsigned DstAddrSpace = CastN->getDestAddressSpace();
571 assert(SrcAddrSpace != DstAddrSpace &&
572 "addrspacecast must be between different address spaces");
574 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
575 // Specific to generic
577 switch (SrcAddrSpace) {
578 default: report_fatal_error("Bad address space in addrspacecast");
579 case ADDRESS_SPACE_GLOBAL:
580 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
582 case ADDRESS_SPACE_SHARED:
583 Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
585 case ADDRESS_SPACE_CONST:
586 Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
588 case ADDRESS_SPACE_LOCAL:
589 Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
592 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
594 // Generic to specific
595 if (SrcAddrSpace != 0)
596 report_fatal_error("Cannot cast between two non-generic address spaces");
598 switch (DstAddrSpace) {
599 default: report_fatal_error("Bad address space in addrspacecast");
600 case ADDRESS_SPACE_GLOBAL:
601 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
602 : NVPTX::cvta_to_global_yes;
604 case ADDRESS_SPACE_SHARED:
605 Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
606 : NVPTX::cvta_to_shared_yes;
608 case ADDRESS_SPACE_CONST:
610 TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
612 case ADDRESS_SPACE_LOCAL:
614 TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
617 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
621 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
623 LoadSDNode *LD = cast<LoadSDNode>(N);
624 EVT LoadedVT = LD->getMemoryVT();
625 SDNode *NVPTXLD = nullptr;
627 // do not support pre/post inc/dec
631 if (!LoadedVT.isSimple())
634 // Address Space Setting
635 unsigned int codeAddrSpace = getCodeAddrSpace(LD);
638 // - .volatile is only availalble for .global and .shared
639 bool isVolatile = LD->isVolatile();
640 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
641 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
642 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
646 MVT SimpleVT = LoadedVT.getSimpleVT();
647 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
648 if (SimpleVT.isVector()) {
649 unsigned num = SimpleVT.getVectorNumElements();
651 vecType = NVPTX::PTXLdStInstCode::V2;
653 vecType = NVPTX::PTXLdStInstCode::V4;
658 // Type Setting: fromType + fromTypeWidth
660 // Sign : ISD::SEXTLOAD
661 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
663 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
664 MVT ScalarVT = SimpleVT.getScalarType();
665 // Read at least 8 bits (predicates are stored as 8-bit values)
666 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
667 unsigned int fromType;
668 if ((LD->getExtensionType() == ISD::SEXTLOAD))
669 fromType = NVPTX::PTXLdStInstCode::Signed;
670 else if (ScalarVT.isFloatingPoint())
671 fromType = NVPTX::PTXLdStInstCode::Float;
673 fromType = NVPTX::PTXLdStInstCode::Unsigned;
675 // Create the machine instruction DAG
676 SDValue Chain = N->getOperand(0);
677 SDValue N1 = N->getOperand(1);
679 SDValue Offset, Base;
681 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
683 if (SelectDirectAddr(N1, Addr)) {
686 Opcode = NVPTX::LD_i8_avar;
689 Opcode = NVPTX::LD_i16_avar;
692 Opcode = NVPTX::LD_i32_avar;
695 Opcode = NVPTX::LD_i64_avar;
698 Opcode = NVPTX::LD_f32_avar;
701 Opcode = NVPTX::LD_f64_avar;
706 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
707 getI32Imm(vecType), getI32Imm(fromType),
708 getI32Imm(fromTypeWidth), Addr, Chain };
709 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
710 } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
711 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
714 Opcode = NVPTX::LD_i8_asi;
717 Opcode = NVPTX::LD_i16_asi;
720 Opcode = NVPTX::LD_i32_asi;
723 Opcode = NVPTX::LD_i64_asi;
726 Opcode = NVPTX::LD_f32_asi;
729 Opcode = NVPTX::LD_f64_asi;
734 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
735 getI32Imm(vecType), getI32Imm(fromType),
736 getI32Imm(fromTypeWidth), Base, Offset, Chain };
737 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
738 } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
739 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
743 Opcode = NVPTX::LD_i8_ari_64;
746 Opcode = NVPTX::LD_i16_ari_64;
749 Opcode = NVPTX::LD_i32_ari_64;
752 Opcode = NVPTX::LD_i64_ari_64;
755 Opcode = NVPTX::LD_f32_ari_64;
758 Opcode = NVPTX::LD_f64_ari_64;
766 Opcode = NVPTX::LD_i8_ari;
769 Opcode = NVPTX::LD_i16_ari;
772 Opcode = NVPTX::LD_i32_ari;
775 Opcode = NVPTX::LD_i64_ari;
778 Opcode = NVPTX::LD_f32_ari;
781 Opcode = NVPTX::LD_f64_ari;
787 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
788 getI32Imm(vecType), getI32Imm(fromType),
789 getI32Imm(fromTypeWidth), Base, Offset, Chain };
790 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
795 Opcode = NVPTX::LD_i8_areg_64;
798 Opcode = NVPTX::LD_i16_areg_64;
801 Opcode = NVPTX::LD_i32_areg_64;
804 Opcode = NVPTX::LD_i64_areg_64;
807 Opcode = NVPTX::LD_f32_areg_64;
810 Opcode = NVPTX::LD_f64_areg_64;
818 Opcode = NVPTX::LD_i8_areg;
821 Opcode = NVPTX::LD_i16_areg;
824 Opcode = NVPTX::LD_i32_areg;
827 Opcode = NVPTX::LD_i64_areg;
830 Opcode = NVPTX::LD_f32_areg;
833 Opcode = NVPTX::LD_f64_areg;
839 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
840 getI32Imm(vecType), getI32Imm(fromType),
841 getI32Imm(fromTypeWidth), N1, Chain };
842 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
846 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
847 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
848 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
854 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
856 SDValue Chain = N->getOperand(0);
857 SDValue Op1 = N->getOperand(1);
858 SDValue Addr, Offset, Base;
862 MemSDNode *MemSD = cast<MemSDNode>(N);
863 EVT LoadedVT = MemSD->getMemoryVT();
865 if (!LoadedVT.isSimple())
868 // Address Space Setting
869 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
872 // - .volatile is only availalble for .global and .shared
873 bool IsVolatile = MemSD->isVolatile();
874 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
875 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
876 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
880 MVT SimpleVT = LoadedVT.getSimpleVT();
882 // Type Setting: fromType + fromTypeWidth
884 // Sign : ISD::SEXTLOAD
885 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
887 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
888 MVT ScalarVT = SimpleVT.getScalarType();
889 // Read at least 8 bits (predicates are stored as 8-bit values)
890 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
891 unsigned int FromType;
892 // The last operand holds the original LoadSDNode::getExtensionType() value
893 unsigned ExtensionType = cast<ConstantSDNode>(
894 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
895 if (ExtensionType == ISD::SEXTLOAD)
896 FromType = NVPTX::PTXLdStInstCode::Signed;
897 else if (ScalarVT.isFloatingPoint())
898 FromType = NVPTX::PTXLdStInstCode::Float;
900 FromType = NVPTX::PTXLdStInstCode::Unsigned;
904 switch (N->getOpcode()) {
905 case NVPTXISD::LoadV2:
906 VecType = NVPTX::PTXLdStInstCode::V2;
908 case NVPTXISD::LoadV4:
909 VecType = NVPTX::PTXLdStInstCode::V4;
915 EVT EltVT = N->getValueType(0);
917 if (SelectDirectAddr(Op1, Addr)) {
918 switch (N->getOpcode()) {
921 case NVPTXISD::LoadV2:
922 switch (EltVT.getSimpleVT().SimpleTy) {
926 Opcode = NVPTX::LDV_i8_v2_avar;
929 Opcode = NVPTX::LDV_i16_v2_avar;
932 Opcode = NVPTX::LDV_i32_v2_avar;
935 Opcode = NVPTX::LDV_i64_v2_avar;
938 Opcode = NVPTX::LDV_f32_v2_avar;
941 Opcode = NVPTX::LDV_f64_v2_avar;
945 case NVPTXISD::LoadV4:
946 switch (EltVT.getSimpleVT().SimpleTy) {
950 Opcode = NVPTX::LDV_i8_v4_avar;
953 Opcode = NVPTX::LDV_i16_v4_avar;
956 Opcode = NVPTX::LDV_i32_v4_avar;
959 Opcode = NVPTX::LDV_f32_v4_avar;
965 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
966 getI32Imm(VecType), getI32Imm(FromType),
967 getI32Imm(FromTypeWidth), Addr, Chain };
968 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
969 } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
970 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
971 switch (N->getOpcode()) {
974 case NVPTXISD::LoadV2:
975 switch (EltVT.getSimpleVT().SimpleTy) {
979 Opcode = NVPTX::LDV_i8_v2_asi;
982 Opcode = NVPTX::LDV_i16_v2_asi;
985 Opcode = NVPTX::LDV_i32_v2_asi;
988 Opcode = NVPTX::LDV_i64_v2_asi;
991 Opcode = NVPTX::LDV_f32_v2_asi;
994 Opcode = NVPTX::LDV_f64_v2_asi;
998 case NVPTXISD::LoadV4:
999 switch (EltVT.getSimpleVT().SimpleTy) {
1003 Opcode = NVPTX::LDV_i8_v4_asi;
1006 Opcode = NVPTX::LDV_i16_v4_asi;
1009 Opcode = NVPTX::LDV_i32_v4_asi;
1012 Opcode = NVPTX::LDV_f32_v4_asi;
1018 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1019 getI32Imm(VecType), getI32Imm(FromType),
1020 getI32Imm(FromTypeWidth), Base, Offset, Chain };
1021 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1022 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1023 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1025 switch (N->getOpcode()) {
1028 case NVPTXISD::LoadV2:
1029 switch (EltVT.getSimpleVT().SimpleTy) {
1033 Opcode = NVPTX::LDV_i8_v2_ari_64;
1036 Opcode = NVPTX::LDV_i16_v2_ari_64;
1039 Opcode = NVPTX::LDV_i32_v2_ari_64;
1042 Opcode = NVPTX::LDV_i64_v2_ari_64;
1045 Opcode = NVPTX::LDV_f32_v2_ari_64;
1048 Opcode = NVPTX::LDV_f64_v2_ari_64;
1052 case NVPTXISD::LoadV4:
1053 switch (EltVT.getSimpleVT().SimpleTy) {
1057 Opcode = NVPTX::LDV_i8_v4_ari_64;
1060 Opcode = NVPTX::LDV_i16_v4_ari_64;
1063 Opcode = NVPTX::LDV_i32_v4_ari_64;
1066 Opcode = NVPTX::LDV_f32_v4_ari_64;
1072 switch (N->getOpcode()) {
1075 case NVPTXISD::LoadV2:
1076 switch (EltVT.getSimpleVT().SimpleTy) {
1080 Opcode = NVPTX::LDV_i8_v2_ari;
1083 Opcode = NVPTX::LDV_i16_v2_ari;
1086 Opcode = NVPTX::LDV_i32_v2_ari;
1089 Opcode = NVPTX::LDV_i64_v2_ari;
1092 Opcode = NVPTX::LDV_f32_v2_ari;
1095 Opcode = NVPTX::LDV_f64_v2_ari;
1099 case NVPTXISD::LoadV4:
1100 switch (EltVT.getSimpleVT().SimpleTy) {
1104 Opcode = NVPTX::LDV_i8_v4_ari;
1107 Opcode = NVPTX::LDV_i16_v4_ari;
1110 Opcode = NVPTX::LDV_i32_v4_ari;
1113 Opcode = NVPTX::LDV_f32_v4_ari;
1120 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1121 getI32Imm(VecType), getI32Imm(FromType),
1122 getI32Imm(FromTypeWidth), Base, Offset, Chain };
1124 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1127 switch (N->getOpcode()) {
1130 case NVPTXISD::LoadV2:
1131 switch (EltVT.getSimpleVT().SimpleTy) {
1135 Opcode = NVPTX::LDV_i8_v2_areg_64;
1138 Opcode = NVPTX::LDV_i16_v2_areg_64;
1141 Opcode = NVPTX::LDV_i32_v2_areg_64;
1144 Opcode = NVPTX::LDV_i64_v2_areg_64;
1147 Opcode = NVPTX::LDV_f32_v2_areg_64;
1150 Opcode = NVPTX::LDV_f64_v2_areg_64;
1154 case NVPTXISD::LoadV4:
1155 switch (EltVT.getSimpleVT().SimpleTy) {
1159 Opcode = NVPTX::LDV_i8_v4_areg_64;
1162 Opcode = NVPTX::LDV_i16_v4_areg_64;
1165 Opcode = NVPTX::LDV_i32_v4_areg_64;
1168 Opcode = NVPTX::LDV_f32_v4_areg_64;
1174 switch (N->getOpcode()) {
1177 case NVPTXISD::LoadV2:
1178 switch (EltVT.getSimpleVT().SimpleTy) {
1182 Opcode = NVPTX::LDV_i8_v2_areg;
1185 Opcode = NVPTX::LDV_i16_v2_areg;
1188 Opcode = NVPTX::LDV_i32_v2_areg;
1191 Opcode = NVPTX::LDV_i64_v2_areg;
1194 Opcode = NVPTX::LDV_f32_v2_areg;
1197 Opcode = NVPTX::LDV_f64_v2_areg;
1201 case NVPTXISD::LoadV4:
1202 switch (EltVT.getSimpleVT().SimpleTy) {
1206 Opcode = NVPTX::LDV_i8_v4_areg;
1209 Opcode = NVPTX::LDV_i16_v4_areg;
1212 Opcode = NVPTX::LDV_i32_v4_areg;
1215 Opcode = NVPTX::LDV_f32_v4_areg;
1222 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1223 getI32Imm(VecType), getI32Imm(FromType),
1224 getI32Imm(FromTypeWidth), Op1, Chain };
1225 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1228 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1229 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1230 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1235 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
1237 SDValue Chain = N->getOperand(0);
1242 // If this is an LDG intrinsic, the address is the third operand. Its its an
1243 // LDG/LDU SD node (from custom vector handling), then its the second operand
1244 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1245 Op1 = N->getOperand(2);
1246 Mem = cast<MemIntrinsicSDNode>(N);
1247 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1251 case Intrinsic::nvvm_ldg_global_f:
1252 case Intrinsic::nvvm_ldg_global_i:
1253 case Intrinsic::nvvm_ldg_global_p:
1256 case Intrinsic::nvvm_ldu_global_f:
1257 case Intrinsic::nvvm_ldu_global_i:
1258 case Intrinsic::nvvm_ldu_global_p:
1263 Op1 = N->getOperand(1);
1264 Mem = cast<MemSDNode>(N);
1270 SDValue Base, Offset, Addr;
1272 EVT EltVT = Mem->getMemoryVT();
1273 if (EltVT.isVector()) {
1274 EltVT = EltVT.getVectorElementType();
1277 if (SelectDirectAddr(Op1, Addr)) {
1278 switch (N->getOpcode()) {
1281 case ISD::INTRINSIC_W_CHAIN:
1283 switch (EltVT.getSimpleVT().SimpleTy) {
1287 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1290 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1293 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1296 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1299 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1302 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1306 switch (EltVT.getSimpleVT().SimpleTy) {
1310 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1313 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1316 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1319 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1322 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1325 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1330 case NVPTXISD::LDGV2:
1331 switch (EltVT.getSimpleVT().SimpleTy) {
1335 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1338 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1341 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1344 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1347 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1350 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1354 case NVPTXISD::LDUV2:
1355 switch (EltVT.getSimpleVT().SimpleTy) {
1359 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1362 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1365 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1368 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1371 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1374 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1378 case NVPTXISD::LDGV4:
1379 switch (EltVT.getSimpleVT().SimpleTy) {
1383 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1386 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1389 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1392 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1396 case NVPTXISD::LDUV4:
1397 switch (EltVT.getSimpleVT().SimpleTy) {
1401 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1404 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1407 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1410 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1416 SDValue Ops[] = { Addr, Chain };
1417 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1418 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1419 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1421 switch (N->getOpcode()) {
1424 case ISD::INTRINSIC_W_CHAIN:
1426 switch (EltVT.getSimpleVT().SimpleTy) {
1430 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1433 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1436 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1439 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1442 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1445 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1449 switch (EltVT.getSimpleVT().SimpleTy) {
1453 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1456 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1459 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1462 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1465 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1468 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1473 case NVPTXISD::LDGV2:
1474 switch (EltVT.getSimpleVT().SimpleTy) {
1478 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1481 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1484 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1487 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1490 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1493 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1497 case NVPTXISD::LDUV2:
1498 switch (EltVT.getSimpleVT().SimpleTy) {
1502 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1505 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1508 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1511 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1514 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1517 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1521 case NVPTXISD::LDGV4:
1522 switch (EltVT.getSimpleVT().SimpleTy) {
1526 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1529 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1532 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1535 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1539 case NVPTXISD::LDUV4:
1540 switch (EltVT.getSimpleVT().SimpleTy) {
1544 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1547 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1550 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1553 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1559 switch (N->getOpcode()) {
1562 case ISD::INTRINSIC_W_CHAIN:
1564 switch (EltVT.getSimpleVT().SimpleTy) {
1568 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1571 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1574 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1577 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1580 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1583 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1587 switch (EltVT.getSimpleVT().SimpleTy) {
1591 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1594 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1597 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1600 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1603 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1606 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1611 case NVPTXISD::LDGV2:
1612 switch (EltVT.getSimpleVT().SimpleTy) {
1616 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1619 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1622 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1625 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1628 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1631 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1635 case NVPTXISD::LDUV2:
1636 switch (EltVT.getSimpleVT().SimpleTy) {
1640 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1643 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1646 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1649 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1652 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1655 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1659 case NVPTXISD::LDGV4:
1660 switch (EltVT.getSimpleVT().SimpleTy) {
1664 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1667 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1670 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1673 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1677 case NVPTXISD::LDUV4:
1678 switch (EltVT.getSimpleVT().SimpleTy) {
1682 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1685 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1688 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1691 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1698 SDValue Ops[] = { Base, Offset, Chain };
1700 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1703 switch (N->getOpcode()) {
1706 case ISD::INTRINSIC_W_CHAIN:
1708 switch (EltVT.getSimpleVT().SimpleTy) {
1712 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1715 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1718 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1721 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1724 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1727 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1731 switch (EltVT.getSimpleVT().SimpleTy) {
1735 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1738 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1741 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1744 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1747 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1750 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1755 case NVPTXISD::LDGV2:
1756 switch (EltVT.getSimpleVT().SimpleTy) {
1760 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1763 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1766 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1769 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1772 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1775 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1779 case NVPTXISD::LDUV2:
1780 switch (EltVT.getSimpleVT().SimpleTy) {
1784 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1787 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1790 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1793 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1796 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1799 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1803 case NVPTXISD::LDGV4:
1804 switch (EltVT.getSimpleVT().SimpleTy) {
1808 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1811 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1814 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1817 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1821 case NVPTXISD::LDUV4:
1822 switch (EltVT.getSimpleVT().SimpleTy) {
1826 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1829 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1832 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1835 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1841 switch (N->getOpcode()) {
1844 case ISD::INTRINSIC_W_CHAIN:
1846 switch (EltVT.getSimpleVT().SimpleTy) {
1850 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1853 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1856 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1859 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1862 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1865 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1869 switch (EltVT.getSimpleVT().SimpleTy) {
1873 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1876 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1879 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1882 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1885 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1888 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1893 case NVPTXISD::LDGV2:
1894 switch (EltVT.getSimpleVT().SimpleTy) {
1898 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1901 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1904 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1907 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1910 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1913 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1917 case NVPTXISD::LDUV2:
1918 switch (EltVT.getSimpleVT().SimpleTy) {
1922 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1925 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1928 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1931 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1934 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1937 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1941 case NVPTXISD::LDGV4:
1942 switch (EltVT.getSimpleVT().SimpleTy) {
1946 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1949 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1952 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1955 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1959 case NVPTXISD::LDUV4:
1960 switch (EltVT.getSimpleVT().SimpleTy) {
1964 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1967 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1970 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1973 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1980 SDValue Ops[] = { Op1, Chain };
1981 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1984 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1985 MemRefs0[0] = Mem->getMemOperand();
1986 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1991 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
1993 StoreSDNode *ST = cast<StoreSDNode>(N);
1994 EVT StoreVT = ST->getMemoryVT();
1995 SDNode *NVPTXST = nullptr;
1997 // do not support pre/post inc/dec
1998 if (ST->isIndexed())
2001 if (!StoreVT.isSimple())
2004 // Address Space Setting
2005 unsigned int codeAddrSpace = getCodeAddrSpace(ST);
2008 // - .volatile is only availalble for .global and .shared
2009 bool isVolatile = ST->isVolatile();
2010 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2011 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2012 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2016 MVT SimpleVT = StoreVT.getSimpleVT();
2017 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
2018 if (SimpleVT.isVector()) {
2019 unsigned num = SimpleVT.getVectorNumElements();
2021 vecType = NVPTX::PTXLdStInstCode::V2;
2023 vecType = NVPTX::PTXLdStInstCode::V4;
2028 // Type Setting: toType + toTypeWidth
2029 // - for integer type, always use 'u'
2031 MVT ScalarVT = SimpleVT.getScalarType();
2032 unsigned toTypeWidth = ScalarVT.getSizeInBits();
2033 unsigned int toType;
2034 if (ScalarVT.isFloatingPoint())
2035 toType = NVPTX::PTXLdStInstCode::Float;
2037 toType = NVPTX::PTXLdStInstCode::Unsigned;
2039 // Create the machine instruction DAG
2040 SDValue Chain = N->getOperand(0);
2041 SDValue N1 = N->getOperand(1);
2042 SDValue N2 = N->getOperand(2);
2044 SDValue Offset, Base;
2046 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
2048 if (SelectDirectAddr(N2, Addr)) {
2051 Opcode = NVPTX::ST_i8_avar;
2054 Opcode = NVPTX::ST_i16_avar;
2057 Opcode = NVPTX::ST_i32_avar;
2060 Opcode = NVPTX::ST_i64_avar;
2063 Opcode = NVPTX::ST_f32_avar;
2066 Opcode = NVPTX::ST_f64_avar;
2071 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2072 getI32Imm(vecType), getI32Imm(toType),
2073 getI32Imm(toTypeWidth), Addr, Chain };
2074 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2075 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2076 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2079 Opcode = NVPTX::ST_i8_asi;
2082 Opcode = NVPTX::ST_i16_asi;
2085 Opcode = NVPTX::ST_i32_asi;
2088 Opcode = NVPTX::ST_i64_asi;
2091 Opcode = NVPTX::ST_f32_asi;
2094 Opcode = NVPTX::ST_f64_asi;
2099 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2100 getI32Imm(vecType), getI32Imm(toType),
2101 getI32Imm(toTypeWidth), Base, Offset, Chain };
2102 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2103 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2104 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2108 Opcode = NVPTX::ST_i8_ari_64;
2111 Opcode = NVPTX::ST_i16_ari_64;
2114 Opcode = NVPTX::ST_i32_ari_64;
2117 Opcode = NVPTX::ST_i64_ari_64;
2120 Opcode = NVPTX::ST_f32_ari_64;
2123 Opcode = NVPTX::ST_f64_ari_64;
2131 Opcode = NVPTX::ST_i8_ari;
2134 Opcode = NVPTX::ST_i16_ari;
2137 Opcode = NVPTX::ST_i32_ari;
2140 Opcode = NVPTX::ST_i64_ari;
2143 Opcode = NVPTX::ST_f32_ari;
2146 Opcode = NVPTX::ST_f64_ari;
2152 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2153 getI32Imm(vecType), getI32Imm(toType),
2154 getI32Imm(toTypeWidth), Base, Offset, Chain };
2155 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2160 Opcode = NVPTX::ST_i8_areg_64;
2163 Opcode = NVPTX::ST_i16_areg_64;
2166 Opcode = NVPTX::ST_i32_areg_64;
2169 Opcode = NVPTX::ST_i64_areg_64;
2172 Opcode = NVPTX::ST_f32_areg_64;
2175 Opcode = NVPTX::ST_f64_areg_64;
2183 Opcode = NVPTX::ST_i8_areg;
2186 Opcode = NVPTX::ST_i16_areg;
2189 Opcode = NVPTX::ST_i32_areg;
2192 Opcode = NVPTX::ST_i64_areg;
2195 Opcode = NVPTX::ST_f32_areg;
2198 Opcode = NVPTX::ST_f64_areg;
2204 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2205 getI32Imm(vecType), getI32Imm(toType),
2206 getI32Imm(toTypeWidth), N2, Chain };
2207 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2211 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2212 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2213 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2219 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
2220 SDValue Chain = N->getOperand(0);
2221 SDValue Op1 = N->getOperand(1);
2222 SDValue Addr, Offset, Base;
2226 EVT EltVT = Op1.getValueType();
2227 MemSDNode *MemSD = cast<MemSDNode>(N);
2228 EVT StoreVT = MemSD->getMemoryVT();
2230 // Address Space Setting
2231 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
2233 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2234 report_fatal_error("Cannot store to pointer that points to constant "
2239 // - .volatile is only availalble for .global and .shared
2240 bool IsVolatile = MemSD->isVolatile();
2241 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2242 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2243 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2246 // Type Setting: toType + toTypeWidth
2247 // - for integer type, always use 'u'
2248 assert(StoreVT.isSimple() && "Store value is not simple");
2249 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2250 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2252 if (ScalarVT.isFloatingPoint())
2253 ToType = NVPTX::PTXLdStInstCode::Float;
2255 ToType = NVPTX::PTXLdStInstCode::Unsigned;
2257 SmallVector<SDValue, 12> StOps;
2261 switch (N->getOpcode()) {
2262 case NVPTXISD::StoreV2:
2263 VecType = NVPTX::PTXLdStInstCode::V2;
2264 StOps.push_back(N->getOperand(1));
2265 StOps.push_back(N->getOperand(2));
2266 N2 = N->getOperand(3);
2268 case NVPTXISD::StoreV4:
2269 VecType = NVPTX::PTXLdStInstCode::V4;
2270 StOps.push_back(N->getOperand(1));
2271 StOps.push_back(N->getOperand(2));
2272 StOps.push_back(N->getOperand(3));
2273 StOps.push_back(N->getOperand(4));
2274 N2 = N->getOperand(5);
2280 StOps.push_back(getI32Imm(IsVolatile));
2281 StOps.push_back(getI32Imm(CodeAddrSpace));
2282 StOps.push_back(getI32Imm(VecType));
2283 StOps.push_back(getI32Imm(ToType));
2284 StOps.push_back(getI32Imm(ToTypeWidth));
2286 if (SelectDirectAddr(N2, Addr)) {
2287 switch (N->getOpcode()) {
2290 case NVPTXISD::StoreV2:
2291 switch (EltVT.getSimpleVT().SimpleTy) {
2295 Opcode = NVPTX::STV_i8_v2_avar;
2298 Opcode = NVPTX::STV_i16_v2_avar;
2301 Opcode = NVPTX::STV_i32_v2_avar;
2304 Opcode = NVPTX::STV_i64_v2_avar;
2307 Opcode = NVPTX::STV_f32_v2_avar;
2310 Opcode = NVPTX::STV_f64_v2_avar;
2314 case NVPTXISD::StoreV4:
2315 switch (EltVT.getSimpleVT().SimpleTy) {
2319 Opcode = NVPTX::STV_i8_v4_avar;
2322 Opcode = NVPTX::STV_i16_v4_avar;
2325 Opcode = NVPTX::STV_i32_v4_avar;
2328 Opcode = NVPTX::STV_f32_v4_avar;
2333 StOps.push_back(Addr);
2334 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2335 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2336 switch (N->getOpcode()) {
2339 case NVPTXISD::StoreV2:
2340 switch (EltVT.getSimpleVT().SimpleTy) {
2344 Opcode = NVPTX::STV_i8_v2_asi;
2347 Opcode = NVPTX::STV_i16_v2_asi;
2350 Opcode = NVPTX::STV_i32_v2_asi;
2353 Opcode = NVPTX::STV_i64_v2_asi;
2356 Opcode = NVPTX::STV_f32_v2_asi;
2359 Opcode = NVPTX::STV_f64_v2_asi;
2363 case NVPTXISD::StoreV4:
2364 switch (EltVT.getSimpleVT().SimpleTy) {
2368 Opcode = NVPTX::STV_i8_v4_asi;
2371 Opcode = NVPTX::STV_i16_v4_asi;
2374 Opcode = NVPTX::STV_i32_v4_asi;
2377 Opcode = NVPTX::STV_f32_v4_asi;
2382 StOps.push_back(Base);
2383 StOps.push_back(Offset);
2384 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2385 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2387 switch (N->getOpcode()) {
2390 case NVPTXISD::StoreV2:
2391 switch (EltVT.getSimpleVT().SimpleTy) {
2395 Opcode = NVPTX::STV_i8_v2_ari_64;
2398 Opcode = NVPTX::STV_i16_v2_ari_64;
2401 Opcode = NVPTX::STV_i32_v2_ari_64;
2404 Opcode = NVPTX::STV_i64_v2_ari_64;
2407 Opcode = NVPTX::STV_f32_v2_ari_64;
2410 Opcode = NVPTX::STV_f64_v2_ari_64;
2414 case NVPTXISD::StoreV4:
2415 switch (EltVT.getSimpleVT().SimpleTy) {
2419 Opcode = NVPTX::STV_i8_v4_ari_64;
2422 Opcode = NVPTX::STV_i16_v4_ari_64;
2425 Opcode = NVPTX::STV_i32_v4_ari_64;
2428 Opcode = NVPTX::STV_f32_v4_ari_64;
2434 switch (N->getOpcode()) {
2437 case NVPTXISD::StoreV2:
2438 switch (EltVT.getSimpleVT().SimpleTy) {
2442 Opcode = NVPTX::STV_i8_v2_ari;
2445 Opcode = NVPTX::STV_i16_v2_ari;
2448 Opcode = NVPTX::STV_i32_v2_ari;
2451 Opcode = NVPTX::STV_i64_v2_ari;
2454 Opcode = NVPTX::STV_f32_v2_ari;
2457 Opcode = NVPTX::STV_f64_v2_ari;
2461 case NVPTXISD::StoreV4:
2462 switch (EltVT.getSimpleVT().SimpleTy) {
2466 Opcode = NVPTX::STV_i8_v4_ari;
2469 Opcode = NVPTX::STV_i16_v4_ari;
2472 Opcode = NVPTX::STV_i32_v4_ari;
2475 Opcode = NVPTX::STV_f32_v4_ari;
2481 StOps.push_back(Base);
2482 StOps.push_back(Offset);
2485 switch (N->getOpcode()) {
2488 case NVPTXISD::StoreV2:
2489 switch (EltVT.getSimpleVT().SimpleTy) {
2493 Opcode = NVPTX::STV_i8_v2_areg_64;
2496 Opcode = NVPTX::STV_i16_v2_areg_64;
2499 Opcode = NVPTX::STV_i32_v2_areg_64;
2502 Opcode = NVPTX::STV_i64_v2_areg_64;
2505 Opcode = NVPTX::STV_f32_v2_areg_64;
2508 Opcode = NVPTX::STV_f64_v2_areg_64;
2512 case NVPTXISD::StoreV4:
2513 switch (EltVT.getSimpleVT().SimpleTy) {
2517 Opcode = NVPTX::STV_i8_v4_areg_64;
2520 Opcode = NVPTX::STV_i16_v4_areg_64;
2523 Opcode = NVPTX::STV_i32_v4_areg_64;
2526 Opcode = NVPTX::STV_f32_v4_areg_64;
2532 switch (N->getOpcode()) {
2535 case NVPTXISD::StoreV2:
2536 switch (EltVT.getSimpleVT().SimpleTy) {
2540 Opcode = NVPTX::STV_i8_v2_areg;
2543 Opcode = NVPTX::STV_i16_v2_areg;
2546 Opcode = NVPTX::STV_i32_v2_areg;
2549 Opcode = NVPTX::STV_i64_v2_areg;
2552 Opcode = NVPTX::STV_f32_v2_areg;
2555 Opcode = NVPTX::STV_f64_v2_areg;
2559 case NVPTXISD::StoreV4:
2560 switch (EltVT.getSimpleVT().SimpleTy) {
2564 Opcode = NVPTX::STV_i8_v4_areg;
2567 Opcode = NVPTX::STV_i16_v4_areg;
2570 Opcode = NVPTX::STV_i32_v4_areg;
2573 Opcode = NVPTX::STV_f32_v4_areg;
2579 StOps.push_back(N2);
2582 StOps.push_back(Chain);
2584 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2586 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2587 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2588 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2593 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2594 SDValue Chain = Node->getOperand(0);
2595 SDValue Offset = Node->getOperand(2);
2596 SDValue Flag = Node->getOperand(3);
2598 MemSDNode *Mem = cast<MemSDNode>(Node);
2601 switch (Node->getOpcode()) {
2604 case NVPTXISD::LoadParam:
2607 case NVPTXISD::LoadParamV2:
2610 case NVPTXISD::LoadParamV4:
2615 EVT EltVT = Node->getValueType(0);
2616 EVT MemVT = Mem->getMemoryVT();
2624 switch (MemVT.getSimpleVT().SimpleTy) {
2628 Opc = NVPTX::LoadParamMemI8;
2631 Opc = NVPTX::LoadParamMemI8;
2634 Opc = NVPTX::LoadParamMemI16;
2637 Opc = NVPTX::LoadParamMemI32;
2640 Opc = NVPTX::LoadParamMemI64;
2643 Opc = NVPTX::LoadParamMemF32;
2646 Opc = NVPTX::LoadParamMemF64;
2651 switch (MemVT.getSimpleVT().SimpleTy) {
2655 Opc = NVPTX::LoadParamMemV2I8;
2658 Opc = NVPTX::LoadParamMemV2I8;
2661 Opc = NVPTX::LoadParamMemV2I16;
2664 Opc = NVPTX::LoadParamMemV2I32;
2667 Opc = NVPTX::LoadParamMemV2I64;
2670 Opc = NVPTX::LoadParamMemV2F32;
2673 Opc = NVPTX::LoadParamMemV2F64;
2678 switch (MemVT.getSimpleVT().SimpleTy) {
2682 Opc = NVPTX::LoadParamMemV4I8;
2685 Opc = NVPTX::LoadParamMemV4I8;
2688 Opc = NVPTX::LoadParamMemV4I16;
2691 Opc = NVPTX::LoadParamMemV4I32;
2694 Opc = NVPTX::LoadParamMemV4F32;
2702 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2703 } else if (VecSize == 2) {
2704 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2706 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2707 VTs = CurDAG->getVTList(EVTs);
2710 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2712 SmallVector<SDValue, 2> Ops;
2713 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2714 Ops.push_back(Chain);
2715 Ops.push_back(Flag);
2718 CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2722 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2724 SDValue Chain = N->getOperand(0);
2725 SDValue Offset = N->getOperand(1);
2726 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2727 MemSDNode *Mem = cast<MemSDNode>(N);
2729 // How many elements do we have?
2730 unsigned NumElts = 1;
2731 switch (N->getOpcode()) {
2734 case NVPTXISD::StoreRetval:
2737 case NVPTXISD::StoreRetvalV2:
2740 case NVPTXISD::StoreRetvalV4:
2745 // Build vector of operands
2746 SmallVector<SDValue, 6> Ops;
2747 for (unsigned i = 0; i < NumElts; ++i)
2748 Ops.push_back(N->getOperand(i + 2));
2749 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2750 Ops.push_back(Chain);
2752 // Determine target opcode
2753 // If we have an i1, use an 8-bit store. The lowering code in
2754 // NVPTXISelLowering will have already emitted an upcast.
2755 unsigned Opcode = 0;
2760 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2764 Opcode = NVPTX::StoreRetvalI8;
2767 Opcode = NVPTX::StoreRetvalI8;
2770 Opcode = NVPTX::StoreRetvalI16;
2773 Opcode = NVPTX::StoreRetvalI32;
2776 Opcode = NVPTX::StoreRetvalI64;
2779 Opcode = NVPTX::StoreRetvalF32;
2782 Opcode = NVPTX::StoreRetvalF64;
2787 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2791 Opcode = NVPTX::StoreRetvalV2I8;
2794 Opcode = NVPTX::StoreRetvalV2I8;
2797 Opcode = NVPTX::StoreRetvalV2I16;
2800 Opcode = NVPTX::StoreRetvalV2I32;
2803 Opcode = NVPTX::StoreRetvalV2I64;
2806 Opcode = NVPTX::StoreRetvalV2F32;
2809 Opcode = NVPTX::StoreRetvalV2F64;
2814 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2818 Opcode = NVPTX::StoreRetvalV4I8;
2821 Opcode = NVPTX::StoreRetvalV4I8;
2824 Opcode = NVPTX::StoreRetvalV4I16;
2827 Opcode = NVPTX::StoreRetvalV4I32;
2830 Opcode = NVPTX::StoreRetvalV4F32;
2837 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2838 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2839 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2840 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2845 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2847 SDValue Chain = N->getOperand(0);
2848 SDValue Param = N->getOperand(1);
2849 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2850 SDValue Offset = N->getOperand(2);
2851 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2852 MemSDNode *Mem = cast<MemSDNode>(N);
2853 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2855 // How many elements do we have?
2856 unsigned NumElts = 1;
2857 switch (N->getOpcode()) {
2860 case NVPTXISD::StoreParamU32:
2861 case NVPTXISD::StoreParamS32:
2862 case NVPTXISD::StoreParam:
2865 case NVPTXISD::StoreParamV2:
2868 case NVPTXISD::StoreParamV4:
2873 // Build vector of operands
2874 SmallVector<SDValue, 8> Ops;
2875 for (unsigned i = 0; i < NumElts; ++i)
2876 Ops.push_back(N->getOperand(i + 3));
2877 Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
2878 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2879 Ops.push_back(Chain);
2880 Ops.push_back(Flag);
2882 // Determine target opcode
2883 // If we have an i1, use an 8-bit store. The lowering code in
2884 // NVPTXISelLowering will have already emitted an upcast.
2885 unsigned Opcode = 0;
2886 switch (N->getOpcode()) {
2892 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2896 Opcode = NVPTX::StoreParamI8;
2899 Opcode = NVPTX::StoreParamI8;
2902 Opcode = NVPTX::StoreParamI16;
2905 Opcode = NVPTX::StoreParamI32;
2908 Opcode = NVPTX::StoreParamI64;
2911 Opcode = NVPTX::StoreParamF32;
2914 Opcode = NVPTX::StoreParamF64;
2919 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2923 Opcode = NVPTX::StoreParamV2I8;
2926 Opcode = NVPTX::StoreParamV2I8;
2929 Opcode = NVPTX::StoreParamV2I16;
2932 Opcode = NVPTX::StoreParamV2I32;
2935 Opcode = NVPTX::StoreParamV2I64;
2938 Opcode = NVPTX::StoreParamV2F32;
2941 Opcode = NVPTX::StoreParamV2F64;
2946 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2950 Opcode = NVPTX::StoreParamV4I8;
2953 Opcode = NVPTX::StoreParamV4I8;
2956 Opcode = NVPTX::StoreParamV4I16;
2959 Opcode = NVPTX::StoreParamV4I32;
2962 Opcode = NVPTX::StoreParamV4F32;
2968 // Special case: if we have a sign-extend/zero-extend node, insert the
2969 // conversion instruction first, and use that as the value operand to
2970 // the selected StoreParam node.
2971 case NVPTXISD::StoreParamU32: {
2972 Opcode = NVPTX::StoreParamI32;
2973 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2975 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2976 MVT::i32, Ops[0], CvtNone);
2977 Ops[0] = SDValue(Cvt, 0);
2980 case NVPTXISD::StoreParamS32: {
2981 Opcode = NVPTX::StoreParamI32;
2982 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2984 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2985 MVT::i32, Ops[0], CvtNone);
2986 Ops[0] = SDValue(Cvt, 0);
2991 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2993 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
2994 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2995 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2996 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
3001 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
3002 SDValue Chain = N->getOperand(0);
3003 SDNode *Ret = nullptr;
3005 SmallVector<SDValue, 8> Ops;
3007 switch (N->getOpcode()) {
3008 default: return nullptr;
3009 case NVPTXISD::Tex1DFloatS32:
3010 Opc = NVPTX::TEX_1D_F32_S32;
3012 case NVPTXISD::Tex1DFloatFloat:
3013 Opc = NVPTX::TEX_1D_F32_F32;
3015 case NVPTXISD::Tex1DFloatFloatLevel:
3016 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3018 case NVPTXISD::Tex1DFloatFloatGrad:
3019 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3021 case NVPTXISD::Tex1DS32S32:
3022 Opc = NVPTX::TEX_1D_S32_S32;
3024 case NVPTXISD::Tex1DS32Float:
3025 Opc = NVPTX::TEX_1D_S32_F32;
3027 case NVPTXISD::Tex1DS32FloatLevel:
3028 Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
3030 case NVPTXISD::Tex1DS32FloatGrad:
3031 Opc = NVPTX::TEX_1D_S32_F32_GRAD;
3033 case NVPTXISD::Tex1DU32S32:
3034 Opc = NVPTX::TEX_1D_U32_S32;
3036 case NVPTXISD::Tex1DU32Float:
3037 Opc = NVPTX::TEX_1D_U32_F32;
3039 case NVPTXISD::Tex1DU32FloatLevel:
3040 Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3042 case NVPTXISD::Tex1DU32FloatGrad:
3043 Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3045 case NVPTXISD::Tex1DArrayFloatS32:
3046 Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
3048 case NVPTXISD::Tex1DArrayFloatFloat:
3049 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3051 case NVPTXISD::Tex1DArrayFloatFloatLevel:
3052 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3054 case NVPTXISD::Tex1DArrayFloatFloatGrad:
3055 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3057 case NVPTXISD::Tex1DArrayS32S32:
3058 Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
3060 case NVPTXISD::Tex1DArrayS32Float:
3061 Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
3063 case NVPTXISD::Tex1DArrayS32FloatLevel:
3064 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
3066 case NVPTXISD::Tex1DArrayS32FloatGrad:
3067 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
3069 case NVPTXISD::Tex1DArrayU32S32:
3070 Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3072 case NVPTXISD::Tex1DArrayU32Float:
3073 Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3075 case NVPTXISD::Tex1DArrayU32FloatLevel:
3076 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3078 case NVPTXISD::Tex1DArrayU32FloatGrad:
3079 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3081 case NVPTXISD::Tex2DFloatS32:
3082 Opc = NVPTX::TEX_2D_F32_S32;
3084 case NVPTXISD::Tex2DFloatFloat:
3085 Opc = NVPTX::TEX_2D_F32_F32;
3087 case NVPTXISD::Tex2DFloatFloatLevel:
3088 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3090 case NVPTXISD::Tex2DFloatFloatGrad:
3091 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3093 case NVPTXISD::Tex2DS32S32:
3094 Opc = NVPTX::TEX_2D_S32_S32;
3096 case NVPTXISD::Tex2DS32Float:
3097 Opc = NVPTX::TEX_2D_S32_F32;
3099 case NVPTXISD::Tex2DS32FloatLevel:
3100 Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
3102 case NVPTXISD::Tex2DS32FloatGrad:
3103 Opc = NVPTX::TEX_2D_S32_F32_GRAD;
3105 case NVPTXISD::Tex2DU32S32:
3106 Opc = NVPTX::TEX_2D_U32_S32;
3108 case NVPTXISD::Tex2DU32Float:
3109 Opc = NVPTX::TEX_2D_U32_F32;
3111 case NVPTXISD::Tex2DU32FloatLevel:
3112 Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3114 case NVPTXISD::Tex2DU32FloatGrad:
3115 Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3117 case NVPTXISD::Tex2DArrayFloatS32:
3118 Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
3120 case NVPTXISD::Tex2DArrayFloatFloat:
3121 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3123 case NVPTXISD::Tex2DArrayFloatFloatLevel:
3124 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3126 case NVPTXISD::Tex2DArrayFloatFloatGrad:
3127 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3129 case NVPTXISD::Tex2DArrayS32S32:
3130 Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
3132 case NVPTXISD::Tex2DArrayS32Float:
3133 Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
3135 case NVPTXISD::Tex2DArrayS32FloatLevel:
3136 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
3138 case NVPTXISD::Tex2DArrayS32FloatGrad:
3139 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
3141 case NVPTXISD::Tex2DArrayU32S32:
3142 Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3144 case NVPTXISD::Tex2DArrayU32Float:
3145 Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3147 case NVPTXISD::Tex2DArrayU32FloatLevel:
3148 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3150 case NVPTXISD::Tex2DArrayU32FloatGrad:
3151 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3153 case NVPTXISD::Tex3DFloatS32:
3154 Opc = NVPTX::TEX_3D_F32_S32;
3156 case NVPTXISD::Tex3DFloatFloat:
3157 Opc = NVPTX::TEX_3D_F32_F32;
3159 case NVPTXISD::Tex3DFloatFloatLevel:
3160 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3162 case NVPTXISD::Tex3DFloatFloatGrad:
3163 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3165 case NVPTXISD::Tex3DS32S32:
3166 Opc = NVPTX::TEX_3D_S32_S32;
3168 case NVPTXISD::Tex3DS32Float:
3169 Opc = NVPTX::TEX_3D_S32_F32;
3171 case NVPTXISD::Tex3DS32FloatLevel:
3172 Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
3174 case NVPTXISD::Tex3DS32FloatGrad:
3175 Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3177 case NVPTXISD::Tex3DU32S32:
3178 Opc = NVPTX::TEX_3D_U32_S32;
3180 case NVPTXISD::Tex3DU32Float:
3181 Opc = NVPTX::TEX_3D_U32_F32;
3183 case NVPTXISD::Tex3DU32FloatLevel:
3184 Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3186 case NVPTXISD::Tex3DU32FloatGrad:
3187 Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3189 case NVPTXISD::TexCubeFloatFloat:
3190 Opc = NVPTX::TEX_CUBE_F32_F32;
3192 case NVPTXISD::TexCubeFloatFloatLevel:
3193 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3195 case NVPTXISD::TexCubeS32Float:
3196 Opc = NVPTX::TEX_CUBE_S32_F32;
3198 case NVPTXISD::TexCubeS32FloatLevel:
3199 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3201 case NVPTXISD::TexCubeU32Float:
3202 Opc = NVPTX::TEX_CUBE_U32_F32;
3204 case NVPTXISD::TexCubeU32FloatLevel:
3205 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3207 case NVPTXISD::TexCubeArrayFloatFloat:
3208 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3210 case NVPTXISD::TexCubeArrayFloatFloatLevel:
3211 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3213 case NVPTXISD::TexCubeArrayS32Float:
3214 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3216 case NVPTXISD::TexCubeArrayS32FloatLevel:
3217 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3219 case NVPTXISD::TexCubeArrayU32Float:
3220 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3222 case NVPTXISD::TexCubeArrayU32FloatLevel:
3223 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3225 case NVPTXISD::Tld4R2DFloatFloat:
3226 Opc = NVPTX::TLD4_R_2D_F32_F32;
3228 case NVPTXISD::Tld4G2DFloatFloat:
3229 Opc = NVPTX::TLD4_G_2D_F32_F32;
3231 case NVPTXISD::Tld4B2DFloatFloat:
3232 Opc = NVPTX::TLD4_B_2D_F32_F32;
3234 case NVPTXISD::Tld4A2DFloatFloat:
3235 Opc = NVPTX::TLD4_A_2D_F32_F32;
3237 case NVPTXISD::Tld4R2DS64Float:
3238 Opc = NVPTX::TLD4_R_2D_S32_F32;
3240 case NVPTXISD::Tld4G2DS64Float:
3241 Opc = NVPTX::TLD4_G_2D_S32_F32;
3243 case NVPTXISD::Tld4B2DS64Float:
3244 Opc = NVPTX::TLD4_B_2D_S32_F32;
3246 case NVPTXISD::Tld4A2DS64Float:
3247 Opc = NVPTX::TLD4_A_2D_S32_F32;
3249 case NVPTXISD::Tld4R2DU64Float:
3250 Opc = NVPTX::TLD4_R_2D_U32_F32;
3252 case NVPTXISD::Tld4G2DU64Float:
3253 Opc = NVPTX::TLD4_G_2D_U32_F32;
3255 case NVPTXISD::Tld4B2DU64Float:
3256 Opc = NVPTX::TLD4_B_2D_U32_F32;
3258 case NVPTXISD::Tld4A2DU64Float:
3259 Opc = NVPTX::TLD4_A_2D_U32_F32;
3261 case NVPTXISD::TexUnified1DFloatS32:
3262 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3264 case NVPTXISD::TexUnified1DFloatFloat:
3265 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3267 case NVPTXISD::TexUnified1DFloatFloatLevel:
3268 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3270 case NVPTXISD::TexUnified1DFloatFloatGrad:
3271 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3273 case NVPTXISD::TexUnified1DS32S32:
3274 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3276 case NVPTXISD::TexUnified1DS32Float:
3277 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3279 case NVPTXISD::TexUnified1DS32FloatLevel:
3280 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3282 case NVPTXISD::TexUnified1DS32FloatGrad:
3283 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3285 case NVPTXISD::TexUnified1DU32S32:
3286 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3288 case NVPTXISD::TexUnified1DU32Float:
3289 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3291 case NVPTXISD::TexUnified1DU32FloatLevel:
3292 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3294 case NVPTXISD::TexUnified1DU32FloatGrad:
3295 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3297 case NVPTXISD::TexUnified1DArrayFloatS32:
3298 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3300 case NVPTXISD::TexUnified1DArrayFloatFloat:
3301 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3303 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
3304 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3306 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
3307 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3309 case NVPTXISD::TexUnified1DArrayS32S32:
3310 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3312 case NVPTXISD::TexUnified1DArrayS32Float:
3313 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3315 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
3316 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3318 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
3319 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3321 case NVPTXISD::TexUnified1DArrayU32S32:
3322 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3324 case NVPTXISD::TexUnified1DArrayU32Float:
3325 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3327 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
3328 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3330 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
3331 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3333 case NVPTXISD::TexUnified2DFloatS32:
3334 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3336 case NVPTXISD::TexUnified2DFloatFloat:
3337 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3339 case NVPTXISD::TexUnified2DFloatFloatLevel:
3340 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3342 case NVPTXISD::TexUnified2DFloatFloatGrad:
3343 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3345 case NVPTXISD::TexUnified2DS32S32:
3346 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3348 case NVPTXISD::TexUnified2DS32Float:
3349 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3351 case NVPTXISD::TexUnified2DS32FloatLevel:
3352 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3354 case NVPTXISD::TexUnified2DS32FloatGrad:
3355 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3357 case NVPTXISD::TexUnified2DU32S32:
3358 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3360 case NVPTXISD::TexUnified2DU32Float:
3361 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3363 case NVPTXISD::TexUnified2DU32FloatLevel:
3364 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3366 case NVPTXISD::TexUnified2DU32FloatGrad:
3367 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3369 case NVPTXISD::TexUnified2DArrayFloatS32:
3370 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3372 case NVPTXISD::TexUnified2DArrayFloatFloat:
3373 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3375 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
3376 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3378 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
3379 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3381 case NVPTXISD::TexUnified2DArrayS32S32:
3382 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3384 case NVPTXISD::TexUnified2DArrayS32Float:
3385 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3387 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
3388 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3390 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
3391 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3393 case NVPTXISD::TexUnified2DArrayU32S32:
3394 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3396 case NVPTXISD::TexUnified2DArrayU32Float:
3397 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3399 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
3400 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3402 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
3403 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3405 case NVPTXISD::TexUnified3DFloatS32:
3406 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3408 case NVPTXISD::TexUnified3DFloatFloat:
3409 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3411 case NVPTXISD::TexUnified3DFloatFloatLevel:
3412 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3414 case NVPTXISD::TexUnified3DFloatFloatGrad:
3415 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3417 case NVPTXISD::TexUnified3DS32S32:
3418 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3420 case NVPTXISD::TexUnified3DS32Float:
3421 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3423 case NVPTXISD::TexUnified3DS32FloatLevel:
3424 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3426 case NVPTXISD::TexUnified3DS32FloatGrad:
3427 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3429 case NVPTXISD::TexUnified3DU32S32:
3430 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3432 case NVPTXISD::TexUnified3DU32Float:
3433 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3435 case NVPTXISD::TexUnified3DU32FloatLevel:
3436 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3438 case NVPTXISD::TexUnified3DU32FloatGrad:
3439 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3441 case NVPTXISD::TexUnifiedCubeFloatFloat:
3442 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3444 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
3445 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3447 case NVPTXISD::TexUnifiedCubeS32Float:
3448 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3450 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
3451 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3453 case NVPTXISD::TexUnifiedCubeU32Float:
3454 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3456 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
3457 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3459 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
3460 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3462 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
3463 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3465 case NVPTXISD::TexUnifiedCubeArrayS32Float:
3466 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3468 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
3469 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3471 case NVPTXISD::TexUnifiedCubeArrayU32Float:
3472 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3474 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
3475 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3477 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
3478 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3480 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
3481 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3483 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
3484 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3486 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
3487 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3489 case NVPTXISD::Tld4UnifiedR2DS64Float:
3490 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3492 case NVPTXISD::Tld4UnifiedG2DS64Float:
3493 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3495 case NVPTXISD::Tld4UnifiedB2DS64Float:
3496 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3498 case NVPTXISD::Tld4UnifiedA2DS64Float:
3499 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3501 case NVPTXISD::Tld4UnifiedR2DU64Float:
3502 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3504 case NVPTXISD::Tld4UnifiedG2DU64Float:
3505 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3507 case NVPTXISD::Tld4UnifiedB2DU64Float:
3508 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3510 case NVPTXISD::Tld4UnifiedA2DU64Float:
3511 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
3515 // Copy over operands
3516 for (unsigned i = 1; i < N->getNumOperands(); ++i) {
3517 Ops.push_back(N->getOperand(i));
3520 Ops.push_back(Chain);
3521 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3525 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
3526 SDValue Chain = N->getOperand(0);
3527 SDValue TexHandle = N->getOperand(1);
3528 SDNode *Ret = nullptr;
3530 SmallVector<SDValue, 8> Ops;
3531 switch (N->getOpcode()) {
3532 default: return nullptr;
3533 case NVPTXISD::Suld1DI8Clamp:
3534 Opc = NVPTX::SULD_1D_I8_CLAMP;
3535 Ops.push_back(TexHandle);
3536 Ops.push_back(N->getOperand(2));
3537 Ops.push_back(Chain);
3539 case NVPTXISD::Suld1DI16Clamp:
3540 Opc = NVPTX::SULD_1D_I16_CLAMP;
3541 Ops.push_back(TexHandle);
3542 Ops.push_back(N->getOperand(2));
3543 Ops.push_back(Chain);
3545 case NVPTXISD::Suld1DI32Clamp:
3546 Opc = NVPTX::SULD_1D_I32_CLAMP;
3547 Ops.push_back(TexHandle);
3548 Ops.push_back(N->getOperand(2));
3549 Ops.push_back(Chain);
3551 case NVPTXISD::Suld1DI64Clamp:
3552 Opc = NVPTX::SULD_1D_I64_CLAMP;
3553 Ops.push_back(TexHandle);
3554 Ops.push_back(N->getOperand(2));
3555 Ops.push_back(Chain);
3557 case NVPTXISD::Suld1DV2I8Clamp:
3558 Opc = NVPTX::SULD_1D_V2I8_CLAMP;
3559 Ops.push_back(TexHandle);
3560 Ops.push_back(N->getOperand(2));
3561 Ops.push_back(Chain);
3563 case NVPTXISD::Suld1DV2I16Clamp:
3564 Opc = NVPTX::SULD_1D_V2I16_CLAMP;
3565 Ops.push_back(TexHandle);
3566 Ops.push_back(N->getOperand(2));
3567 Ops.push_back(Chain);
3569 case NVPTXISD::Suld1DV2I32Clamp:
3570 Opc = NVPTX::SULD_1D_V2I32_CLAMP;
3571 Ops.push_back(TexHandle);
3572 Ops.push_back(N->getOperand(2));
3573 Ops.push_back(Chain);
3575 case NVPTXISD::Suld1DV2I64Clamp:
3576 Opc = NVPTX::SULD_1D_V2I64_CLAMP;
3577 Ops.push_back(TexHandle);
3578 Ops.push_back(N->getOperand(2));
3579 Ops.push_back(Chain);
3581 case NVPTXISD::Suld1DV4I8Clamp:
3582 Opc = NVPTX::SULD_1D_V4I8_CLAMP;
3583 Ops.push_back(TexHandle);
3584 Ops.push_back(N->getOperand(2));
3585 Ops.push_back(Chain);
3587 case NVPTXISD::Suld1DV4I16Clamp:
3588 Opc = NVPTX::SULD_1D_V4I16_CLAMP;
3589 Ops.push_back(TexHandle);
3590 Ops.push_back(N->getOperand(2));
3591 Ops.push_back(Chain);
3593 case NVPTXISD::Suld1DV4I32Clamp:
3594 Opc = NVPTX::SULD_1D_V4I32_CLAMP;
3595 Ops.push_back(TexHandle);
3596 Ops.push_back(N->getOperand(2));
3597 Ops.push_back(Chain);
3599 case NVPTXISD::Suld1DArrayI8Clamp:
3600 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
3601 Ops.push_back(TexHandle);
3602 Ops.push_back(N->getOperand(2));
3603 Ops.push_back(N->getOperand(3));
3604 Ops.push_back(Chain);
3606 case NVPTXISD::Suld1DArrayI16Clamp:
3607 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
3608 Ops.push_back(TexHandle);
3609 Ops.push_back(N->getOperand(2));
3610 Ops.push_back(N->getOperand(3));
3611 Ops.push_back(Chain);
3613 case NVPTXISD::Suld1DArrayI32Clamp:
3614 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
3615 Ops.push_back(TexHandle);
3616 Ops.push_back(N->getOperand(2));
3617 Ops.push_back(N->getOperand(3));
3618 Ops.push_back(Chain);
3620 case NVPTXISD::Suld1DArrayI64Clamp:
3621 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
3622 Ops.push_back(TexHandle);
3623 Ops.push_back(N->getOperand(2));
3624 Ops.push_back(N->getOperand(3));
3625 Ops.push_back(Chain);
3627 case NVPTXISD::Suld1DArrayV2I8Clamp:
3628 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
3629 Ops.push_back(TexHandle);
3630 Ops.push_back(N->getOperand(2));
3631 Ops.push_back(N->getOperand(3));
3632 Ops.push_back(Chain);
3634 case NVPTXISD::Suld1DArrayV2I16Clamp:
3635 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
3636 Ops.push_back(TexHandle);
3637 Ops.push_back(N->getOperand(2));
3638 Ops.push_back(N->getOperand(3));
3639 Ops.push_back(Chain);
3641 case NVPTXISD::Suld1DArrayV2I32Clamp:
3642 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
3643 Ops.push_back(TexHandle);
3644 Ops.push_back(N->getOperand(2));
3645 Ops.push_back(N->getOperand(3));
3646 Ops.push_back(Chain);
3648 case NVPTXISD::Suld1DArrayV2I64Clamp:
3649 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
3650 Ops.push_back(TexHandle);
3651 Ops.push_back(N->getOperand(2));
3652 Ops.push_back(N->getOperand(3));
3653 Ops.push_back(Chain);
3655 case NVPTXISD::Suld1DArrayV4I8Clamp:
3656 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
3657 Ops.push_back(TexHandle);
3658 Ops.push_back(N->getOperand(2));
3659 Ops.push_back(N->getOperand(3));
3660 Ops.push_back(Chain);
3662 case NVPTXISD::Suld1DArrayV4I16Clamp:
3663 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
3664 Ops.push_back(TexHandle);
3665 Ops.push_back(N->getOperand(2));
3666 Ops.push_back(N->getOperand(3));
3667 Ops.push_back(Chain);
3669 case NVPTXISD::Suld1DArrayV4I32Clamp:
3670 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
3671 Ops.push_back(TexHandle);
3672 Ops.push_back(N->getOperand(2));
3673 Ops.push_back(N->getOperand(3));
3674 Ops.push_back(Chain);
3676 case NVPTXISD::Suld2DI8Clamp:
3677 Opc = NVPTX::SULD_2D_I8_CLAMP;
3678 Ops.push_back(TexHandle);
3679 Ops.push_back(N->getOperand(2));
3680 Ops.push_back(N->getOperand(3));
3681 Ops.push_back(Chain);
3683 case NVPTXISD::Suld2DI16Clamp:
3684 Opc = NVPTX::SULD_2D_I16_CLAMP;
3685 Ops.push_back(TexHandle);
3686 Ops.push_back(N->getOperand(2));
3687 Ops.push_back(N->getOperand(3));
3688 Ops.push_back(Chain);
3690 case NVPTXISD::Suld2DI32Clamp:
3691 Opc = NVPTX::SULD_2D_I32_CLAMP;
3692 Ops.push_back(TexHandle);
3693 Ops.push_back(N->getOperand(2));
3694 Ops.push_back(N->getOperand(3));
3695 Ops.push_back(Chain);
3697 case NVPTXISD::Suld2DI64Clamp:
3698 Opc = NVPTX::SULD_2D_I64_CLAMP;
3699 Ops.push_back(TexHandle);
3700 Ops.push_back(N->getOperand(2));
3701 Ops.push_back(N->getOperand(3));
3702 Ops.push_back(Chain);
3704 case NVPTXISD::Suld2DV2I8Clamp:
3705 Opc = NVPTX::SULD_2D_V2I8_CLAMP;
3706 Ops.push_back(TexHandle);
3707 Ops.push_back(N->getOperand(2));
3708 Ops.push_back(N->getOperand(3));
3709 Ops.push_back(Chain);
3711 case NVPTXISD::Suld2DV2I16Clamp:
3712 Opc = NVPTX::SULD_2D_V2I16_CLAMP;
3713 Ops.push_back(TexHandle);
3714 Ops.push_back(N->getOperand(2));
3715 Ops.push_back(N->getOperand(3));
3716 Ops.push_back(Chain);
3718 case NVPTXISD::Suld2DV2I32Clamp:
3719 Opc = NVPTX::SULD_2D_V2I32_CLAMP;
3720 Ops.push_back(TexHandle);
3721 Ops.push_back(N->getOperand(2));
3722 Ops.push_back(N->getOperand(3));
3723 Ops.push_back(Chain);
3725 case NVPTXISD::Suld2DV2I64Clamp:
3726 Opc = NVPTX::SULD_2D_V2I64_CLAMP;
3727 Ops.push_back(TexHandle);
3728 Ops.push_back(N->getOperand(2));
3729 Ops.push_back(N->getOperand(3));
3730 Ops.push_back(Chain);
3732 case NVPTXISD::Suld2DV4I8Clamp:
3733 Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3734 Ops.push_back(TexHandle);
3735 Ops.push_back(N->getOperand(2));
3736 Ops.push_back(N->getOperand(3));
3737 Ops.push_back(Chain);
3739 case NVPTXISD::Suld2DV4I16Clamp:
3740 Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3741 Ops.push_back(TexHandle);
3742 Ops.push_back(N->getOperand(2));
3743 Ops.push_back(N->getOperand(3));
3744 Ops.push_back(Chain);
3746 case NVPTXISD::Suld2DV4I32Clamp:
3747 Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3748 Ops.push_back(TexHandle);
3749 Ops.push_back(N->getOperand(2));
3750 Ops.push_back(N->getOperand(3));
3751 Ops.push_back(Chain);
3753 case NVPTXISD::Suld2DArrayI8Clamp:
3754 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3755 Ops.push_back(TexHandle);
3756 Ops.push_back(N->getOperand(2));
3757 Ops.push_back(N->getOperand(3));
3758 Ops.push_back(N->getOperand(4));
3759 Ops.push_back(Chain);
3761 case NVPTXISD::Suld2DArrayI16Clamp:
3762 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3763 Ops.push_back(TexHandle);
3764 Ops.push_back(N->getOperand(2));
3765 Ops.push_back(N->getOperand(3));
3766 Ops.push_back(N->getOperand(4));
3767 Ops.push_back(Chain);
3769 case NVPTXISD::Suld2DArrayI32Clamp:
3770 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3771 Ops.push_back(TexHandle);
3772 Ops.push_back(N->getOperand(2));
3773 Ops.push_back(N->getOperand(3));
3774 Ops.push_back(N->getOperand(4));
3775 Ops.push_back(Chain);
3777 case NVPTXISD::Suld2DArrayI64Clamp:
3778 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3779 Ops.push_back(TexHandle);
3780 Ops.push_back(N->getOperand(2));
3781 Ops.push_back(N->getOperand(3));
3782 Ops.push_back(N->getOperand(4));
3783 Ops.push_back(Chain);
3785 case NVPTXISD::Suld2DArrayV2I8Clamp:
3786 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3787 Ops.push_back(TexHandle);
3788 Ops.push_back(N->getOperand(2));
3789 Ops.push_back(N->getOperand(3));
3790 Ops.push_back(N->getOperand(4));
3791 Ops.push_back(Chain);
3793 case NVPTXISD::Suld2DArrayV2I16Clamp:
3794 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3795 Ops.push_back(TexHandle);
3796 Ops.push_back(N->getOperand(2));
3797 Ops.push_back(N->getOperand(3));
3798 Ops.push_back(N->getOperand(4));
3799 Ops.push_back(Chain);
3801 case NVPTXISD::Suld2DArrayV2I32Clamp:
3802 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3803 Ops.push_back(TexHandle);
3804 Ops.push_back(N->getOperand(2));
3805 Ops.push_back(N->getOperand(3));
3806 Ops.push_back(N->getOperand(4));
3807 Ops.push_back(Chain);
3809 case NVPTXISD::Suld2DArrayV2I64Clamp:
3810 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3811 Ops.push_back(TexHandle);
3812 Ops.push_back(N->getOperand(2));
3813 Ops.push_back(N->getOperand(3));
3814 Ops.push_back(N->getOperand(4));
3815 Ops.push_back(Chain);
3817 case NVPTXISD::Suld2DArrayV4I8Clamp:
3818 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3819 Ops.push_back(TexHandle);
3820 Ops.push_back(N->getOperand(2));
3821 Ops.push_back(N->getOperand(3));
3822 Ops.push_back(N->getOperand(4));
3823 Ops.push_back(Chain);
3825 case NVPTXISD::Suld2DArrayV4I16Clamp:
3826 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3827 Ops.push_back(TexHandle);
3828 Ops.push_back(N->getOperand(2));
3829 Ops.push_back(N->getOperand(3));
3830 Ops.push_back(N->getOperand(4));
3831 Ops.push_back(Chain);
3833 case NVPTXISD::Suld2DArrayV4I32Clamp:
3834 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3835 Ops.push_back(TexHandle);
3836 Ops.push_back(N->getOperand(2));
3837 Ops.push_back(N->getOperand(3));
3838 Ops.push_back(N->getOperand(4));
3839 Ops.push_back(Chain);
3841 case NVPTXISD::Suld3DI8Clamp:
3842 Opc = NVPTX::SULD_3D_I8_CLAMP;
3843 Ops.push_back(TexHandle);
3844 Ops.push_back(N->getOperand(2));
3845 Ops.push_back(N->getOperand(3));
3846 Ops.push_back(N->getOperand(4));
3847 Ops.push_back(Chain);
3849 case NVPTXISD::Suld3DI16Clamp:
3850 Opc = NVPTX::SULD_3D_I16_CLAMP;
3851 Ops.push_back(TexHandle);
3852 Ops.push_back(N->getOperand(2));
3853 Ops.push_back(N->getOperand(3));
3854 Ops.push_back(N->getOperand(4));
3855 Ops.push_back(Chain);
3857 case NVPTXISD::Suld3DI32Clamp:
3858 Opc = NVPTX::SULD_3D_I32_CLAMP;
3859 Ops.push_back(TexHandle);
3860 Ops.push_back(N->getOperand(2));
3861 Ops.push_back(N->getOperand(3));
3862 Ops.push_back(N->getOperand(4));
3863 Ops.push_back(Chain);
3865 case NVPTXISD::Suld3DI64Clamp:
3866 Opc = NVPTX::SULD_3D_I64_CLAMP;
3867 Ops.push_back(TexHandle);
3868 Ops.push_back(N->getOperand(2));
3869 Ops.push_back(N->getOperand(3));
3870 Ops.push_back(N->getOperand(4));
3871 Ops.push_back(Chain);
3873 case NVPTXISD::Suld3DV2I8Clamp:
3874 Opc = NVPTX::SULD_3D_V2I8_CLAMP;
3875 Ops.push_back(TexHandle);
3876 Ops.push_back(N->getOperand(2));
3877 Ops.push_back(N->getOperand(3));
3878 Ops.push_back(N->getOperand(4));
3879 Ops.push_back(Chain);
3881 case NVPTXISD::Suld3DV2I16Clamp:
3882 Opc = NVPTX::SULD_3D_V2I16_CLAMP;
3883 Ops.push_back(TexHandle);
3884 Ops.push_back(N->getOperand(2));
3885 Ops.push_back(N->getOperand(3));
3886 Ops.push_back(N->getOperand(4));
3887 Ops.push_back(Chain);
3889 case NVPTXISD::Suld3DV2I32Clamp:
3890 Opc = NVPTX::SULD_3D_V2I32_CLAMP;
3891 Ops.push_back(TexHandle);
3892 Ops.push_back(N->getOperand(2));
3893 Ops.push_back(N->getOperand(3));
3894 Ops.push_back(N->getOperand(4));
3895 Ops.push_back(Chain);
3897 case NVPTXISD::Suld3DV2I64Clamp:
3898 Opc = NVPTX::SULD_3D_V2I64_CLAMP;
3899 Ops.push_back(TexHandle);
3900 Ops.push_back(N->getOperand(2));
3901 Ops.push_back(N->getOperand(3));
3902 Ops.push_back(N->getOperand(4));
3903 Ops.push_back(Chain);
3905 case NVPTXISD::Suld3DV4I8Clamp:
3906 Opc = NVPTX::SULD_3D_V4I8_CLAMP;
3907 Ops.push_back(TexHandle);
3908 Ops.push_back(N->getOperand(2));
3909 Ops.push_back(N->getOperand(3));
3910 Ops.push_back(N->getOperand(4));
3911 Ops.push_back(Chain);
3913 case NVPTXISD::Suld3DV4I16Clamp:
3914 Opc = NVPTX::SULD_3D_V4I16_CLAMP;
3915 Ops.push_back(TexHandle);
3916 Ops.push_back(N->getOperand(2));
3917 Ops.push_back(N->getOperand(3));
3918 Ops.push_back(N->getOperand(4));
3919 Ops.push_back(Chain);
3921 case NVPTXISD::Suld3DV4I32Clamp:
3922 Opc = NVPTX::SULD_3D_V4I32_CLAMP;
3923 Ops.push_back(TexHandle);
3924 Ops.push_back(N->getOperand(2));
3925 Ops.push_back(N->getOperand(3));
3926 Ops.push_back(N->getOperand(4));
3927 Ops.push_back(Chain);
3929 case NVPTXISD::Suld1DI8Trap:
3930 Opc = NVPTX::SULD_1D_I8_TRAP;
3931 Ops.push_back(TexHandle);
3932 Ops.push_back(N->getOperand(2));
3933 Ops.push_back(Chain);
3935 case NVPTXISD::Suld1DI16Trap:
3936 Opc = NVPTX::SULD_1D_I16_TRAP;
3937 Ops.push_back(TexHandle);
3938 Ops.push_back(N->getOperand(2));
3939 Ops.push_back(Chain);
3941 case NVPTXISD::Suld1DI32Trap:
3942 Opc = NVPTX::SULD_1D_I32_TRAP;
3943 Ops.push_back(TexHandle);
3944 Ops.push_back(N->getOperand(2));
3945 Ops.push_back(Chain);
3947 case NVPTXISD::Suld1DI64Trap:
3948 Opc = NVPTX::SULD_1D_I64_TRAP;
3949 Ops.push_back(TexHandle);
3950 Ops.push_back(N->getOperand(2));
3951 Ops.push_back(Chain);
3953 case NVPTXISD::Suld1DV2I8Trap:
3954 Opc = NVPTX::SULD_1D_V2I8_TRAP;
3955 Ops.push_back(TexHandle);
3956 Ops.push_back(N->getOperand(2));
3957 Ops.push_back(Chain);
3959 case NVPTXISD::Suld1DV2I16Trap:
3960 Opc = NVPTX::SULD_1D_V2I16_TRAP;
3961 Ops.push_back(TexHandle);
3962 Ops.push_back(N->getOperand(2));
3963 Ops.push_back(Chain);
3965 case NVPTXISD::Suld1DV2I32Trap:
3966 Opc = NVPTX::SULD_1D_V2I32_TRAP;
3967 Ops.push_back(TexHandle);
3968 Ops.push_back(N->getOperand(2));
3969 Ops.push_back(Chain);
3971 case NVPTXISD::Suld1DV2I64Trap:
3972 Opc = NVPTX::SULD_1D_V2I64_TRAP;
3973 Ops.push_back(TexHandle);
3974 Ops.push_back(N->getOperand(2));
3975 Ops.push_back(Chain);
3977 case NVPTXISD::Suld1DV4I8Trap:
3978 Opc = NVPTX::SULD_1D_V4I8_TRAP;
3979 Ops.push_back(TexHandle);
3980 Ops.push_back(N->getOperand(2));
3981 Ops.push_back(Chain);
3983 case NVPTXISD::Suld1DV4I16Trap:
3984 Opc = NVPTX::SULD_1D_V4I16_TRAP;
3985 Ops.push_back(TexHandle);
3986 Ops.push_back(N->getOperand(2));
3987 Ops.push_back(Chain);
3989 case NVPTXISD::Suld1DV4I32Trap:
3990 Opc = NVPTX::SULD_1D_V4I32_TRAP;
3991 Ops.push_back(TexHandle);
3992 Ops.push_back(N->getOperand(2));
3993 Ops.push_back(Chain);
3995 case NVPTXISD::Suld1DArrayI8Trap:
3996 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
3997 Ops.push_back(TexHandle);
3998 Ops.push_back(N->getOperand(2));
3999 Ops.push_back(N->getOperand(3));
4000 Ops.push_back(Chain);
4002 case NVPTXISD::Suld1DArrayI16Trap:
4003 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4004 Ops.push_back(TexHandle);
4005 Ops.push_back(N->getOperand(2));
4006 Ops.push_back(N->getOperand(3));
4007 Ops.push_back(Chain);
4009 case NVPTXISD::Suld1DArrayI32Trap:
4010 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4011 Ops.push_back(TexHandle);
4012 Ops.push_back(N->getOperand(2));
4013 Ops.push_back(N->getOperand(3));
4014 Ops.push_back(Chain);
4016 case NVPTXISD::Suld1DArrayI64Trap:
4017 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4018 Ops.push_back(TexHandle);
4019 Ops.push_back(N->getOperand(2));
4020 Ops.push_back(N->getOperand(3));
4021 Ops.push_back(Chain);
4023 case NVPTXISD::Suld1DArrayV2I8Trap:
4024 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4025 Ops.push_back(TexHandle);
4026 Ops.push_back(N->getOperand(2));
4027 Ops.push_back(N->getOperand(3));
4028 Ops.push_back(Chain);
4030 case NVPTXISD::Suld1DArrayV2I16Trap:
4031 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4032 Ops.push_back(TexHandle);
4033 Ops.push_back(N->getOperand(2));
4034 Ops.push_back(N->getOperand(3));
4035 Ops.push_back(Chain);
4037 case NVPTXISD::Suld1DArrayV2I32Trap:
4038 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4039 Ops.push_back(TexHandle);
4040 Ops.push_back(N->getOperand(2));
4041 Ops.push_back(N->getOperand(3));
4042 Ops.push_back(Chain);
4044 case NVPTXISD::Suld1DArrayV2I64Trap:
4045 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4046 Ops.push_back(TexHandle);
4047 Ops.push_back(N->getOperand(2));
4048 Ops.push_back(N->getOperand(3));
4049 Ops.push_back(Chain);
4051 case NVPTXISD::Suld1DArrayV4I8Trap:
4052 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4053 Ops.push_back(TexHandle);
4054 Ops.push_back(N->getOperand(2));
4055 Ops.push_back(N->getOperand(3));
4056 Ops.push_back(Chain);
4058 case NVPTXISD::Suld1DArrayV4I16Trap:
4059 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4060 Ops.push_back(TexHandle);
4061 Ops.push_back(N->getOperand(2));
4062 Ops.push_back(N->getOperand(3));
4063 Ops.push_back(Chain);
4065 case NVPTXISD::Suld1DArrayV4I32Trap:
4066 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4067 Ops.push_back(TexHandle);
4068 Ops.push_back(N->getOperand(2));
4069 Ops.push_back(N->getOperand(3));
4070 Ops.push_back(Chain);
4072 case NVPTXISD::Suld2DI8Trap:
4073 Opc = NVPTX::SULD_2D_I8_TRAP;
4074 Ops.push_back(TexHandle);
4075 Ops.push_back(N->getOperand(2));
4076 Ops.push_back(N->getOperand(3));
4077 Ops.push_back(Chain);
4079 case NVPTXISD::Suld2DI16Trap:
4080 Opc = NVPTX::SULD_2D_I16_TRAP;
4081 Ops.push_back(TexHandle);
4082 Ops.push_back(N->getOperand(2));
4083 Ops.push_back(N->getOperand(3));
4084 Ops.push_back(Chain);
4086 case NVPTXISD::Suld2DI32Trap:
4087 Opc = NVPTX::SULD_2D_I32_TRAP;
4088 Ops.push_back(TexHandle);
4089 Ops.push_back(N->getOperand(2));
4090 Ops.push_back(N->getOperand(3));
4091 Ops.push_back(Chain);
4093 case NVPTXISD::Suld2DI64Trap:
4094 Opc = NVPTX::SULD_2D_I64_TRAP;
4095 Ops.push_back(TexHandle);
4096 Ops.push_back(N->getOperand(2));
4097 Ops.push_back(N->getOperand(3));
4098 Ops.push_back(Chain);
4100 case NVPTXISD::Suld2DV2I8Trap:
4101 Opc = NVPTX::SULD_2D_V2I8_TRAP;
4102 Ops.push_back(TexHandle);
4103 Ops.push_back(N->getOperand(2));
4104 Ops.push_back(N->getOperand(3));
4105 Ops.push_back(Chain);
4107 case NVPTXISD::Suld2DV2I16Trap:
4108 Opc = NVPTX::SULD_2D_V2I16_TRAP;
4109 Ops.push_back(TexHandle);
4110 Ops.push_back(N->getOperand(2));
4111 Ops.push_back(N->getOperand(3));
4112 Ops.push_back(Chain);
4114 case NVPTXISD::Suld2DV2I32Trap:
4115 Opc = NVPTX::SULD_2D_V2I32_TRAP;
4116 Ops.push_back(TexHandle);
4117 Ops.push_back(N->getOperand(2));
4118 Ops.push_back(N->getOperand(3));
4119 Ops.push_back(Chain);
4121 case NVPTXISD::Suld2DV2I64Trap:
4122 Opc = NVPTX::SULD_2D_V2I64_TRAP;
4123 Ops.push_back(TexHandle);
4124 Ops.push_back(N->getOperand(2));
4125 Ops.push_back(N->getOperand(3));
4126 Ops.push_back(Chain);
4128 case NVPTXISD::Suld2DV4I8Trap:
4129 Opc = NVPTX::SULD_2D_V4I8_TRAP;
4130 Ops.push_back(TexHandle);
4131 Ops.push_back(N->getOperand(2));
4132 Ops.push_back(N->getOperand(3));
4133 Ops.push_back(Chain);
4135 case NVPTXISD::Suld2DV4I16Trap:
4136 Opc = NVPTX::SULD_2D_V4I16_TRAP;
4137 Ops.push_back(TexHandle);
4138 Ops.push_back(N->getOperand(2));
4139 Ops.push_back(N->getOperand(3));
4140 Ops.push_back(Chain);
4142 case NVPTXISD::Suld2DV4I32Trap:
4143 Opc = NVPTX::SULD_2D_V4I32_TRAP;
4144 Ops.push_back(TexHandle);
4145 Ops.push_back(N->getOperand(2));
4146 Ops.push_back(N->getOperand(3));
4147 Ops.push_back(Chain);
4149 case NVPTXISD::Suld2DArrayI8Trap:
4150 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4151 Ops.push_back(TexHandle);
4152 Ops.push_back(N->getOperand(2));
4153 Ops.push_back(N->getOperand(3));
4154 Ops.push_back(N->getOperand(4));
4155 Ops.push_back(Chain);
4157 case NVPTXISD::Suld2DArrayI16Trap:
4158 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4159 Ops.push_back(TexHandle);
4160 Ops.push_back(N->getOperand(2));
4161 Ops.push_back(N->getOperand(3));
4162 Ops.push_back(N->getOperand(4));
4163 Ops.push_back(Chain);
4165 case NVPTXISD::Suld2DArrayI32Trap:
4166 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4167 Ops.push_back(TexHandle);
4168 Ops.push_back(N->getOperand(2));
4169 Ops.push_back(N->getOperand(3));
4170 Ops.push_back(N->getOperand(4));
4171 Ops.push_back(Chain);
4173 case NVPTXISD::Suld2DArrayI64Trap:
4174 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4175 Ops.push_back(TexHandle);
4176 Ops.push_back(N->getOperand(2));
4177 Ops.push_back(N->getOperand(3));
4178 Ops.push_back(N->getOperand(4));
4179 Ops.push_back(Chain);
4181 case NVPTXISD::Suld2DArrayV2I8Trap:
4182 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4183 Ops.push_back(TexHandle);
4184 Ops.push_back(N->getOperand(2));
4185 Ops.push_back(N->getOperand(3));
4186 Ops.push_back(N->getOperand(4));
4187 Ops.push_back(Chain);
4189 case NVPTXISD::Suld2DArrayV2I16Trap:
4190 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4191 Ops.push_back(TexHandle);
4192 Ops.push_back(N->getOperand(2));
4193 Ops.push_back(N->getOperand(3));
4194 Ops.push_back(N->getOperand(4));
4195 Ops.push_back(Chain);
4197 case NVPTXISD::Suld2DArrayV2I32Trap:
4198 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4199 Ops.push_back(TexHandle);
4200 Ops.push_back(N->getOperand(2));
4201 Ops.push_back(N->getOperand(3));
4202 Ops.push_back(N->getOperand(4));
4203 Ops.push_back(Chain);
4205 case NVPTXISD::Suld2DArrayV2I64Trap:
4206 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4207 Ops.push_back(TexHandle);
4208 Ops.push_back(N->getOperand(2));
4209 Ops.push_back(N->getOperand(3));
4210 Ops.push_back(N->getOperand(4));
4211 Ops.push_back(Chain);
4213 case NVPTXISD::Suld2DArrayV4I8Trap:
4214 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4215 Ops.push_back(TexHandle);
4216 Ops.push_back(N->getOperand(2));
4217 Ops.push_back(N->getOperand(3));
4218 Ops.push_back(N->getOperand(4));
4219 Ops.push_back(Chain);
4221 case NVPTXISD::Suld2DArrayV4I16Trap:
4222 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4223 Ops.push_back(TexHandle);
4224 Ops.push_back(N->getOperand(2));
4225 Ops.push_back(N->getOperand(3));
4226 Ops.push_back(N->getOperand(4));
4227 Ops.push_back(Chain);
4229 case NVPTXISD::Suld2DArrayV4I32Trap:
4230 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4231 Ops.push_back(TexHandle);
4232 Ops.push_back(N->getOperand(2));
4233 Ops.push_back(N->getOperand(3));
4234 Ops.push_back(N->getOperand(4));
4235 Ops.push_back(Chain);
4237 case NVPTXISD::Suld3DI8Trap:
4238 Opc = NVPTX::SULD_3D_I8_TRAP;
4239 Ops.push_back(TexHandle);
4240 Ops.push_back(N->getOperand(2));
4241 Ops.push_back(N->getOperand(3));
4242 Ops.push_back(N->getOperand(4));
4243 Ops.push_back(Chain);
4245 case NVPTXISD::Suld3DI16Trap:
4246 Opc = NVPTX::SULD_3D_I16_TRAP;
4247 Ops.push_back(TexHandle);
4248 Ops.push_back(N->getOperand(2));
4249 Ops.push_back(N->getOperand(3));
4250 Ops.push_back(N->getOperand(4));
4251 Ops.push_back(Chain);
4253 case NVPTXISD::Suld3DI32Trap:
4254 Opc = NVPTX::SULD_3D_I32_TRAP;
4255 Ops.push_back(TexHandle);
4256 Ops.push_back(N->getOperand(2));
4257 Ops.push_back(N->getOperand(3));
4258 Ops.push_back(N->getOperand(4));
4259 Ops.push_back(Chain);
4261 case NVPTXISD::Suld3DI64Trap:
4262 Opc = NVPTX::SULD_3D_I64_TRAP;
4263 Ops.push_back(TexHandle);
4264 Ops.push_back(N->getOperand(2));
4265 Ops.push_back(N->getOperand(3));
4266 Ops.push_back(N->getOperand(4));
4267 Ops.push_back(Chain);
4269 case NVPTXISD::Suld3DV2I8Trap:
4270 Opc = NVPTX::SULD_3D_V2I8_TRAP;
4271 Ops.push_back(TexHandle);
4272 Ops.push_back(N->getOperand(2));
4273 Ops.push_back(N->getOperand(3));
4274 Ops.push_back(N->getOperand(4));
4275 Ops.push_back(Chain);
4277 case NVPTXISD::Suld3DV2I16Trap:
4278 Opc = NVPTX::SULD_3D_V2I16_TRAP;
4279 Ops.push_back(TexHandle);
4280 Ops.push_back(N->getOperand(2));
4281 Ops.push_back(N->getOperand(3));
4282 Ops.push_back(N->getOperand(4));
4283 Ops.push_back(Chain);
4285 case NVPTXISD::Suld3DV2I32Trap:
4286 Opc = NVPTX::SULD_3D_V2I32_TRAP;
4287 Ops.push_back(TexHandle);
4288 Ops.push_back(N->getOperand(2));
4289 Ops.push_back(N->getOperand(3));
4290 Ops.push_back(N->getOperand(4));
4291 Ops.push_back(Chain);
4293 case NVPTXISD::Suld3DV2I64Trap:
4294 Opc = NVPTX::SULD_3D_V2I64_TRAP;
4295 Ops.push_back(TexHandle);
4296 Ops.push_back(N->getOperand(2));
4297 Ops.push_back(N->getOperand(3));
4298 Ops.push_back(N->getOperand(4));
4299 Ops.push_back(Chain);
4301 case NVPTXISD::Suld3DV4I8Trap:
4302 Opc = NVPTX::SULD_3D_V4I8_TRAP;
4303 Ops.push_back(TexHandle);
4304 Ops.push_back(N->getOperand(2));
4305 Ops.push_back(N->getOperand(3));
4306 Ops.push_back(N->getOperand(4));
4307 Ops.push_back(Chain);
4309 case NVPTXISD::Suld3DV4I16Trap:
4310 Opc = NVPTX::SULD_3D_V4I16_TRAP;
4311 Ops.push_back(TexHandle);
4312 Ops.push_back(N->getOperand(2));
4313 Ops.push_back(N->getOperand(3));
4314 Ops.push_back(N->getOperand(4));
4315 Ops.push_back(Chain);
4317 case NVPTXISD::Suld3DV4I32Trap:
4318 Opc = NVPTX::SULD_3D_V4I32_TRAP;
4319 Ops.push_back(TexHandle);
4320 Ops.push_back(N->getOperand(2));
4321 Ops.push_back(N->getOperand(3));
4322 Ops.push_back(N->getOperand(4));
4323 Ops.push_back(Chain);
4325 case NVPTXISD::Suld1DI8Zero:
4326 Opc = NVPTX::SULD_1D_I8_ZERO;
4327 Ops.push_back(TexHandle);
4328 Ops.push_back(N->getOperand(2));
4329 Ops.push_back(Chain);
4331 case NVPTXISD::Suld1DI16Zero:
4332 Opc = NVPTX::SULD_1D_I16_ZERO;
4333 Ops.push_back(TexHandle);
4334 Ops.push_back(N->getOperand(2));
4335 Ops.push_back(Chain);
4337 case NVPTXISD::Suld1DI32Zero:
4338 Opc = NVPTX::SULD_1D_I32_ZERO;
4339 Ops.push_back(TexHandle);
4340 Ops.push_back(N->getOperand(2));
4341 Ops.push_back(Chain);
4343 case NVPTXISD::Suld1DI64Zero:
4344 Opc = NVPTX::SULD_1D_I64_ZERO;
4345 Ops.push_back(TexHandle);
4346 Ops.push_back(N->getOperand(2));
4347 Ops.push_back(Chain);
4349 case NVPTXISD::Suld1DV2I8Zero:
4350 Opc = NVPTX::SULD_1D_V2I8_ZERO;
4351 Ops.push_back(TexHandle);
4352 Ops.push_back(N->getOperand(2));
4353 Ops.push_back(Chain);
4355 case NVPTXISD::Suld1DV2I16Zero:
4356 Opc = NVPTX::SULD_1D_V2I16_ZERO;
4357 Ops.push_back(TexHandle);
4358 Ops.push_back(N->getOperand(2));
4359 Ops.push_back(Chain);
4361 case NVPTXISD::Suld1DV2I32Zero:
4362 Opc = NVPTX::SULD_1D_V2I32_ZERO;
4363 Ops.push_back(TexHandle);
4364 Ops.push_back(N->getOperand(2));
4365 Ops.push_back(Chain);
4367 case NVPTXISD::Suld1DV2I64Zero:
4368 Opc = NVPTX::SULD_1D_V2I64_ZERO;
4369 Ops.push_back(TexHandle);
4370 Ops.push_back(N->getOperand(2));
4371 Ops.push_back(Chain);
4373 case NVPTXISD::Suld1DV4I8Zero:
4374 Opc = NVPTX::SULD_1D_V4I8_ZERO;
4375 Ops.push_back(TexHandle);
4376 Ops.push_back(N->getOperand(2));
4377 Ops.push_back(Chain);
4379 case NVPTXISD::Suld1DV4I16Zero:
4380 Opc = NVPTX::SULD_1D_V4I16_ZERO;
4381 Ops.push_back(TexHandle);
4382 Ops.push_back(N->getOperand(2));
4383 Ops.push_back(Chain);
4385 case NVPTXISD::Suld1DV4I32Zero:
4386 Opc = NVPTX::SULD_1D_V4I32_ZERO;
4387 Ops.push_back(TexHandle);
4388 Ops.push_back(N->getOperand(2));
4389 Ops.push_back(Chain);
4391 case NVPTXISD::Suld1DArrayI8Zero:
4392 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4393 Ops.push_back(TexHandle);
4394 Ops.push_back(N->getOperand(2));
4395 Ops.push_back(N->getOperand(3));
4396 Ops.push_back(Chain);
4398 case NVPTXISD::Suld1DArrayI16Zero:
4399 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4400 Ops.push_back(TexHandle);
4401 Ops.push_back(N->getOperand(2));
4402 Ops.push_back(N->getOperand(3));
4403 Ops.push_back(Chain);
4405 case NVPTXISD::Suld1DArrayI32Zero:
4406 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4407 Ops.push_back(TexHandle);
4408 Ops.push_back(N->getOperand(2));
4409 Ops.push_back(N->getOperand(3));
4410 Ops.push_back(Chain);
4412 case NVPTXISD::Suld1DArrayI64Zero:
4413 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4414 Ops.push_back(TexHandle);
4415 Ops.push_back(N->getOperand(2));
4416 Ops.push_back(N->getOperand(3));
4417 Ops.push_back(Chain);
4419 case NVPTXISD::Suld1DArrayV2I8Zero:
4420 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4421 Ops.push_back(TexHandle);
4422 Ops.push_back(N->getOperand(2));
4423 Ops.push_back(N->getOperand(3));
4424 Ops.push_back(Chain);
4426 case NVPTXISD::Suld1DArrayV2I16Zero:
4427 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4428 Ops.push_back(TexHandle);
4429 Ops.push_back(N->getOperand(2));
4430 Ops.push_back(N->getOperand(3));
4431 Ops.push_back(Chain);
4433 case NVPTXISD::Suld1DArrayV2I32Zero:
4434 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4435 Ops.push_back(TexHandle);
4436 Ops.push_back(N->getOperand(2));
4437 Ops.push_back(N->getOperand(3));
4438 Ops.push_back(Chain);
4440 case NVPTXISD::Suld1DArrayV2I64Zero:
4441 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4442 Ops.push_back(TexHandle);
4443 Ops.push_back(N->getOperand(2));
4444 Ops.push_back(N->getOperand(3));
4445 Ops.push_back(Chain);
4447 case NVPTXISD::Suld1DArrayV4I8Zero:
4448 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4449 Ops.push_back(TexHandle);
4450 Ops.push_back(N->getOperand(2));
4451 Ops.push_back(N->getOperand(3));
4452 Ops.push_back(Chain);
4454 case NVPTXISD::Suld1DArrayV4I16Zero:
4455 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4456 Ops.push_back(TexHandle);
4457 Ops.push_back(N->getOperand(2));
4458 Ops.push_back(N->getOperand(3));
4459 Ops.push_back(Chain);
4461 case NVPTXISD::Suld1DArrayV4I32Zero:
4462 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4463 Ops.push_back(TexHandle);
4464 Ops.push_back(N->getOperand(2));
4465 Ops.push_back(N->getOperand(3));
4466 Ops.push_back(Chain);
4468 case NVPTXISD::Suld2DI8Zero:
4469 Opc = NVPTX::SULD_2D_I8_ZERO;
4470 Ops.push_back(TexHandle);
4471 Ops.push_back(N->getOperand(2));
4472 Ops.push_back(N->getOperand(3));
4473 Ops.push_back(Chain);
4475 case NVPTXISD::Suld2DI16Zero:
4476 Opc = NVPTX::SULD_2D_I16_ZERO;
4477 Ops.push_back(TexHandle);
4478 Ops.push_back(N->getOperand(2));
4479 Ops.push_back(N->getOperand(3));
4480 Ops.push_back(Chain);
4482 case NVPTXISD::Suld2DI32Zero:
4483 Opc = NVPTX::SULD_2D_I32_ZERO;
4484 Ops.push_back(TexHandle);
4485 Ops.push_back(N->getOperand(2));
4486 Ops.push_back(N->getOperand(3));
4487 Ops.push_back(Chain);
4489 case NVPTXISD::Suld2DI64Zero:
4490 Opc = NVPTX::SULD_2D_I64_ZERO;
4491 Ops.push_back(TexHandle);
4492 Ops.push_back(N->getOperand(2));
4493 Ops.push_back(N->getOperand(3));
4494 Ops.push_back(Chain);
4496 case NVPTXISD::Suld2DV2I8Zero:
4497 Opc = NVPTX::SULD_2D_V2I8_ZERO;
4498 Ops.push_back(TexHandle);
4499 Ops.push_back(N->getOperand(2));
4500 Ops.push_back(N->getOperand(3));
4501 Ops.push_back(Chain);
4503 case NVPTXISD::Suld2DV2I16Zero:
4504 Opc = NVPTX::SULD_2D_V2I16_ZERO;
4505 Ops.push_back(TexHandle);
4506 Ops.push_back(N->getOperand(2));
4507 Ops.push_back(N->getOperand(3));
4508 Ops.push_back(Chain);
4510 case NVPTXISD::Suld2DV2I32Zero:
4511 Opc = NVPTX::SULD_2D_V2I32_ZERO;
4512 Ops.push_back(TexHandle);
4513 Ops.push_back(N->getOperand(2));
4514 Ops.push_back(N->getOperand(3));
4515 Ops.push_back(Chain);
4517 case NVPTXISD::Suld2DV2I64Zero:
4518 Opc = NVPTX::SULD_2D_V2I64_ZERO;
4519 Ops.push_back(TexHandle);
4520 Ops.push_back(N->getOperand(2));
4521 Ops.push_back(N->getOperand(3));
4522 Ops.push_back(Chain);
4524 case NVPTXISD::Suld2DV4I8Zero:
4525 Opc = NVPTX::SULD_2D_V4I8_ZERO;
4526 Ops.push_back(TexHandle);
4527 Ops.push_back(N->getOperand(2));
4528 Ops.push_back(N->getOperand(3));
4529 Ops.push_back(Chain);
4531 case NVPTXISD::Suld2DV4I16Zero:
4532 Opc = NVPTX::SULD_2D_V4I16_ZERO;
4533 Ops.push_back(TexHandle);
4534 Ops.push_back(N->getOperand(2));
4535 Ops.push_back(N->getOperand(3));
4536 Ops.push_back(Chain);
4538 case NVPTXISD::Suld2DV4I32Zero:
4539 Opc = NVPTX::SULD_2D_V4I32_ZERO;
4540 Ops.push_back(TexHandle);
4541 Ops.push_back(N->getOperand(2));
4542 Ops.push_back(N->getOperand(3));
4543 Ops.push_back(Chain);
4545 case NVPTXISD::Suld2DArrayI8Zero:
4546 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
4547 Ops.push_back(TexHandle);
4548 Ops.push_back(N->getOperand(2));
4549 Ops.push_back(N->getOperand(3));
4550 Ops.push_back(N->getOperand(4));
4551 Ops.push_back(Chain);
4553 case NVPTXISD::Suld2DArrayI16Zero:
4554 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
4555 Ops.push_back(TexHandle);
4556 Ops.push_back(N->getOperand(2));
4557 Ops.push_back(N->getOperand(3));
4558 Ops.push_back(N->getOperand(4));
4559 Ops.push_back(Chain);
4561 case NVPTXISD::Suld2DArrayI32Zero:
4562 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
4563 Ops.push_back(TexHandle);
4564 Ops.push_back(N->getOperand(2));
4565 Ops.push_back(N->getOperand(3));
4566 Ops.push_back(N->getOperand(4));
4567 Ops.push_back(Chain);
4569 case NVPTXISD::Suld2DArrayI64Zero:
4570 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
4571 Ops.push_back(TexHandle);
4572 Ops.push_back(N->getOperand(2));
4573 Ops.push_back(N->getOperand(3));
4574 Ops.push_back(N->getOperand(4));
4575 Ops.push_back(Chain);
4577 case NVPTXISD::Suld2DArrayV2I8Zero:
4578 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
4579 Ops.push_back(TexHandle);
4580 Ops.push_back(N->getOperand(2));
4581 Ops.push_back(N->getOperand(3));
4582 Ops.push_back(N->getOperand(4));
4583 Ops.push_back(Chain);
4585 case NVPTXISD::Suld2DArrayV2I16Zero:
4586 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
4587 Ops.push_back(TexHandle);
4588 Ops.push_back(N->getOperand(2));
4589 Ops.push_back(N->getOperand(3));
4590 Ops.push_back(N->getOperand(4));
4591 Ops.push_back(Chain);
4593 case NVPTXISD::Suld2DArrayV2I32Zero:
4594 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
4595 Ops.push_back(TexHandle);
4596 Ops.push_back(N->getOperand(2));
4597 Ops.push_back(N->getOperand(3));
4598 Ops.push_back(N->getOperand(4));
4599 Ops.push_back(Chain);
4601 case NVPTXISD::Suld2DArrayV2I64Zero:
4602 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
4603 Ops.push_back(TexHandle);
4604 Ops.push_back(N->getOperand(2));
4605 Ops.push_back(N->getOperand(3));
4606 Ops.push_back(N->getOperand(4));
4607 Ops.push_back(Chain);
4609 case NVPTXISD::Suld2DArrayV4I8Zero:
4610 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
4611 Ops.push_back(TexHandle);
4612 Ops.push_back(N->getOperand(2));
4613 Ops.push_back(N->getOperand(3));
4614 Ops.push_back(N->getOperand(4));
4615 Ops.push_back(Chain);
4617 case NVPTXISD::Suld2DArrayV4I16Zero:
4618 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
4619 Ops.push_back(TexHandle);
4620 Ops.push_back(N->getOperand(2));
4621 Ops.push_back(N->getOperand(3));
4622 Ops.push_back(N->getOperand(4));
4623 Ops.push_back(Chain);
4625 case NVPTXISD::Suld2DArrayV4I32Zero:
4626 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
4627 Ops.push_back(TexHandle);
4628 Ops.push_back(N->getOperand(2));
4629 Ops.push_back(N->getOperand(3));
4630 Ops.push_back(N->getOperand(4));
4631 Ops.push_back(Chain);
4633 case NVPTXISD::Suld3DI8Zero:
4634 Opc = NVPTX::SULD_3D_I8_ZERO;
4635 Ops.push_back(TexHandle);
4636 Ops.push_back(N->getOperand(2));
4637 Ops.push_back(N->getOperand(3));
4638 Ops.push_back(N->getOperand(4));
4639 Ops.push_back(Chain);
4641 case NVPTXISD::Suld3DI16Zero:
4642 Opc = NVPTX::SULD_3D_I16_ZERO;
4643 Ops.push_back(TexHandle);
4644 Ops.push_back(N->getOperand(2));
4645 Ops.push_back(N->getOperand(3));
4646 Ops.push_back(N->getOperand(4));
4647 Ops.push_back(Chain);
4649 case NVPTXISD::Suld3DI32Zero:
4650 Opc = NVPTX::SULD_3D_I32_ZERO;
4651 Ops.push_back(TexHandle);
4652 Ops.push_back(N->getOperand(2));
4653 Ops.push_back(N->getOperand(3));
4654 Ops.push_back(N->getOperand(4));
4655 Ops.push_back(Chain);
4657 case NVPTXISD::Suld3DI64Zero:
4658 Opc = NVPTX::SULD_3D_I64_ZERO;
4659 Ops.push_back(TexHandle);
4660 Ops.push_back(N->getOperand(2));
4661 Ops.push_back(N->getOperand(3));
4662 Ops.push_back(N->getOperand(4));
4663 Ops.push_back(Chain);
4665 case NVPTXISD::Suld3DV2I8Zero:
4666 Opc = NVPTX::SULD_3D_V2I8_ZERO;
4667 Ops.push_back(TexHandle);
4668 Ops.push_back(N->getOperand(2));
4669 Ops.push_back(N->getOperand(3));
4670 Ops.push_back(N->getOperand(4));
4671 Ops.push_back(Chain);
4673 case NVPTXISD::Suld3DV2I16Zero:
4674 Opc = NVPTX::SULD_3D_V2I16_ZERO;
4675 Ops.push_back(TexHandle);
4676 Ops.push_back(N->getOperand(2));
4677 Ops.push_back(N->getOperand(3));
4678 Ops.push_back(N->getOperand(4));
4679 Ops.push_back(Chain);
4681 case NVPTXISD::Suld3DV2I32Zero:
4682 Opc = NVPTX::SULD_3D_V2I32_ZERO;
4683 Ops.push_back(TexHandle);
4684 Ops.push_back(N->getOperand(2));
4685 Ops.push_back(N->getOperand(3));
4686 Ops.push_back(N->getOperand(4));
4687 Ops.push_back(Chain);
4689 case NVPTXISD::Suld3DV2I64Zero:
4690 Opc = NVPTX::SULD_3D_V2I64_ZERO;
4691 Ops.push_back(TexHandle);
4692 Ops.push_back(N->getOperand(2));
4693 Ops.push_back(N->getOperand(3));
4694 Ops.push_back(N->getOperand(4));
4695 Ops.push_back(Chain);
4697 case NVPTXISD::Suld3DV4I8Zero:
4698 Opc = NVPTX::SULD_3D_V4I8_ZERO;
4699 Ops.push_back(TexHandle);
4700 Ops.push_back(N->getOperand(2));
4701 Ops.push_back(N->getOperand(3));
4702 Ops.push_back(N->getOperand(4));
4703 Ops.push_back(Chain);
4705 case NVPTXISD::Suld3DV4I16Zero:
4706 Opc = NVPTX::SULD_3D_V4I16_ZERO;
4707 Ops.push_back(TexHandle);
4708 Ops.push_back(N->getOperand(2));
4709 Ops.push_back(N->getOperand(3));
4710 Ops.push_back(N->getOperand(4));
4711 Ops.push_back(Chain);
4713 case NVPTXISD::Suld3DV4I32Zero:
4714 Opc = NVPTX::SULD_3D_V4I32_ZERO;
4715 Ops.push_back(TexHandle);
4716 Ops.push_back(N->getOperand(2));
4717 Ops.push_back(N->getOperand(3));
4718 Ops.push_back(N->getOperand(4));
4719 Ops.push_back(Chain);
4722 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4727 /// SelectBFE - Look for instruction sequences that can be made more efficient
4728 /// by using the 'bfe' (bit-field extract) PTX instruction
4729 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
4730 SDValue LHS = N->getOperand(0);
4731 SDValue RHS = N->getOperand(1);
4735 bool IsSigned = false;
4737 if (N->getOpcode() == ISD::AND) {
4738 // Canonicalize the operands
4739 // We want 'and %val, %mask'
4740 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4741 std::swap(LHS, RHS);
4744 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
4746 // We need a constant mask on the RHS of the AND
4750 // Extract the mask bits
4751 uint64_t MaskVal = Mask->getZExtValue();
4752 if (!isMask_64(MaskVal)) {
4753 // We *could* handle shifted masks here, but doing so would require an
4754 // 'and' operation to fix up the low-order bits so we would trade
4755 // shr+and for bfe+and, which has the same throughput
4759 // How many bits are in our mask?
4760 uint64_t NumBits = countTrailingOnes(MaskVal);
4761 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
4763 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4764 // We have a 'srl/and' pair, extract the effective start bit and length
4765 Val = LHS.getNode()->getOperand(0);
4766 Start = LHS.getNode()->getOperand(1);
4767 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4769 uint64_t StartVal = StartConst->getZExtValue();
4770 // How many "good" bits do we have left? "good" is defined here as bits
4771 // that exist in the original value, not shifted in.
4772 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
4773 if (NumBits > GoodBits) {
4774 // Do not handle the case where bits have been shifted in. In theory
4775 // we could handle this, but the cost is likely higher than just
4776 // emitting the srl/and pair.
4779 Start = CurDAG->getTargetConstant(StartVal, MVT::i32);
4781 // Do not handle the case where the shift amount (can be zero if no srl
4782 // was found) is not constant. We could handle this case, but it would
4783 // require run-time logic that would be more expensive than just
4784 // emitting the srl/and pair.
4788 // Do not handle the case where the LHS of the and is not a shift. While
4789 // it would be trivial to handle this case, it would just transform
4790 // 'and' -> 'bfe', but 'and' has higher-throughput.
4793 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4794 if (LHS->getOpcode() == ISD::AND) {
4795 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4797 // Shift amount must be constant
4801 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4803 SDValue AndLHS = LHS->getOperand(0);
4804 SDValue AndRHS = LHS->getOperand(1);
4806 // Canonicalize the AND to have the mask on the RHS
4807 if (isa<ConstantSDNode>(AndLHS)) {
4808 std::swap(AndLHS, AndRHS);
4811 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4813 // Mask must be constant
4817 uint64_t MaskVal = MaskCnst->getZExtValue();
4820 if (isMask_64(MaskVal)) {
4822 // The number of bits in the result bitfield will be the number of
4823 // trailing ones (the AND) minus the number of bits we shift off
4824 NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
4825 } else if (isShiftedMask_64(MaskVal)) {
4826 NumZeros = countTrailingZeros(MaskVal);
4827 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
4828 // The number of bits in the result bitfield will be the number of
4829 // trailing zeros plus the number of set bits in the mask minus the
4830 // number of bits we shift off
4831 NumBits = NumZeros + NumOnes - ShiftAmt;
4833 // This is not a mask we can handle
4837 if (ShiftAmt < NumZeros) {
4838 // Handling this case would require extra logic that would make this
4839 // transformation non-profitable
4844 Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32);
4845 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
4846 } else if (LHS->getOpcode() == ISD::SHL) {
4847 // Here, we have a pattern like:
4849 // (sra (shl val, NN), MM)
4851 // (srl (shl val, NN), MM)
4853 // If MM >= NN, we can efficiently optimize this with bfe
4854 Val = LHS->getOperand(0);
4856 SDValue ShlRHS = LHS->getOperand(1);
4857 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
4859 // Shift amount must be constant
4862 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
4864 SDValue ShrRHS = RHS;
4865 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
4867 // Shift amount must be constant
4870 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
4872 // To avoid extra codegen and be profitable, we need Outer >= Inner
4873 if (OuterShiftAmt < InnerShiftAmt) {
4877 // If the outer shift is more than the type size, we have no bitfield to
4878 // extract (since we also check that the inner shift is <= the outer shift
4879 // then this also implies that the inner shift is < the type size)
4880 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
4885 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32);
4887 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
4888 OuterShiftAmt, MVT::i32);
4890 if (N->getOpcode() == ISD::SRA) {
4891 // If we have a arithmetic right shift, we need to use the signed bfe
4906 // For the BFE operations we form here from "and" and "srl", always use the
4907 // unsigned variants.
4908 if (Val.getValueType() == MVT::i32) {
4910 Opc = NVPTX::BFE_S32rii;
4912 Opc = NVPTX::BFE_U32rii;
4914 } else if (Val.getValueType() == MVT::i64) {
4916 Opc = NVPTX::BFE_S64rii;
4918 Opc = NVPTX::BFE_U64rii;
4921 // We cannot handle this type
4930 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4935 // SelectDirectAddr - Match a direct address for DAG.
4936 // A direct address could be a globaladdress or externalsymbol.
4937 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
4938 // Return true if TGA or ES.
4939 if (N.getOpcode() == ISD::TargetGlobalAddress ||
4940 N.getOpcode() == ISD::TargetExternalSymbol) {
4944 if (N.getOpcode() == NVPTXISD::Wrapper) {
4945 Address = N.getOperand(0);
4948 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
4949 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
4950 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
4951 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
4952 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
4958 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
4959 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
4960 if (Addr.getOpcode() == ISD::ADD) {
4961 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
4962 SDValue base = Addr.getOperand(0);
4963 if (SelectDirectAddr(base, Base)) {
4964 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
4973 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
4974 SDValue &Base, SDValue &Offset) {
4975 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
4979 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
4980 SDValue &Base, SDValue &Offset) {
4981 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
4985 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
4986 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
4987 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
4988 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
4989 Offset = CurDAG->getTargetConstant(0, mvt);
4992 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
4993 Addr.getOpcode() == ISD::TargetGlobalAddress)
4994 return false; // direct calls.
4996 if (Addr.getOpcode() == ISD::ADD) {
4997 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
5000 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5001 if (FrameIndexSDNode *FIN =
5002 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
5003 // Constant offset from frame ref.
5004 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5006 Base = Addr.getOperand(0);
5007 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
5015 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5016 SDValue &Base, SDValue &Offset) {
5017 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5021 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5022 SDValue &Base, SDValue &Offset) {
5023 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5026 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5027 unsigned int spN) const {
5028 const Value *Src = nullptr;
5029 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
5030 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5032 Src = mN->getMemOperand()->getValue();
5036 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
5037 return (PT->getAddressSpace() == spN);
5041 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5042 /// inline asm expressions.
5043 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
5044 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
5046 switch (ConstraintID) {
5049 case InlineAsm::Constraint_m: // memory
5050 if (SelectDirectAddr(Op, Op0)) {
5051 OutOps.push_back(Op0);
5052 OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
5055 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5056 OutOps.push_back(Op0);
5057 OutOps.push_back(Op1);