1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines an instruction selector for the NVPTX target.
12 //===----------------------------------------------------------------------===//
14 #include "NVPTXISelDAGToDAG.h"
15 #include "llvm/IR/GlobalValue.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/Target/TargetIntrinsicInfo.h"
25 #define DEBUG_TYPE "nvptx-isel"
27 static cl::opt<int> UsePrecDivF32(
28 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
29 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
30 " IEEE Compliant F32 div.rnd if available."),
34 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
35 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
39 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
40 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
44 /// createNVPTXISelDag - This pass converts a legalized DAG into a
45 /// NVPTX-specific DAG, ready for instruction scheduling.
46 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
47 llvm::CodeGenOpt::Level OptLevel) {
48 return new NVPTXDAGToDAGISel(TM, OptLevel);
51 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
52 CodeGenOpt::Level OptLevel)
53 : SelectionDAGISel(tm, OptLevel), TM(tm) {
54 doMulWide = (OptLevel > 0);
57 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
58 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
59 return SelectionDAGISel::runOnMachineFunction(MF);
62 int NVPTXDAGToDAGISel::getDivF32Level() const {
63 if (UsePrecDivF32.getNumOccurrences() > 0) {
64 // If nvptx-prec-div32=N is used on the command-line, always honor it
67 // Otherwise, use div.approx if fast math is enabled
68 if (TM.Options.UnsafeFPMath)
75 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
76 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
77 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
78 return UsePrecSqrtF32;
80 // Otherwise, use sqrt.approx if fast math is enabled
81 return !TM.Options.UnsafeFPMath;
85 bool NVPTXDAGToDAGISel::useF32FTZ() const {
86 if (FtzEnabled.getNumOccurrences() > 0) {
87 // If nvptx-f32ftz is used on the command-line, always honor it
90 const Function *F = MF->getFunction();
91 // Otherwise, check for an nvptx-f32ftz attribute on the function
92 if (F->hasFnAttribute("nvptx-f32ftz"))
93 return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
99 bool NVPTXDAGToDAGISel::allowFMA() const {
100 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
101 return TL->allowFMA(*MF, OptLevel);
104 /// Select - Select instructions not customized! Used for
105 /// expanded, promoted and normal instructions.
106 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
108 if (N->isMachineOpcode()) {
110 return nullptr; // Already selected.
113 SDNode *ResNode = nullptr;
114 switch (N->getOpcode()) {
116 ResNode = SelectLoad(N);
119 ResNode = SelectStore(N);
121 case NVPTXISD::LoadV2:
122 case NVPTXISD::LoadV4:
123 ResNode = SelectLoadVector(N);
125 case NVPTXISD::LDGV2:
126 case NVPTXISD::LDGV4:
127 case NVPTXISD::LDUV2:
128 case NVPTXISD::LDUV4:
129 ResNode = SelectLDGLDU(N);
131 case NVPTXISD::StoreV2:
132 case NVPTXISD::StoreV4:
133 ResNode = SelectStoreVector(N);
135 case NVPTXISD::LoadParam:
136 case NVPTXISD::LoadParamV2:
137 case NVPTXISD::LoadParamV4:
138 ResNode = SelectLoadParam(N);
140 case NVPTXISD::StoreRetval:
141 case NVPTXISD::StoreRetvalV2:
142 case NVPTXISD::StoreRetvalV4:
143 ResNode = SelectStoreRetval(N);
145 case NVPTXISD::StoreParam:
146 case NVPTXISD::StoreParamV2:
147 case NVPTXISD::StoreParamV4:
148 case NVPTXISD::StoreParamS32:
149 case NVPTXISD::StoreParamU32:
150 ResNode = SelectStoreParam(N);
152 case ISD::INTRINSIC_WO_CHAIN:
153 ResNode = SelectIntrinsicNoChain(N);
155 case ISD::INTRINSIC_W_CHAIN:
156 ResNode = SelectIntrinsicChain(N);
158 case NVPTXISD::Tex1DFloatS32:
159 case NVPTXISD::Tex1DFloatFloat:
160 case NVPTXISD::Tex1DFloatFloatLevel:
161 case NVPTXISD::Tex1DFloatFloatGrad:
162 case NVPTXISD::Tex1DS32S32:
163 case NVPTXISD::Tex1DS32Float:
164 case NVPTXISD::Tex1DS32FloatLevel:
165 case NVPTXISD::Tex1DS32FloatGrad:
166 case NVPTXISD::Tex1DU32S32:
167 case NVPTXISD::Tex1DU32Float:
168 case NVPTXISD::Tex1DU32FloatLevel:
169 case NVPTXISD::Tex1DU32FloatGrad:
170 case NVPTXISD::Tex1DArrayFloatS32:
171 case NVPTXISD::Tex1DArrayFloatFloat:
172 case NVPTXISD::Tex1DArrayFloatFloatLevel:
173 case NVPTXISD::Tex1DArrayFloatFloatGrad:
174 case NVPTXISD::Tex1DArrayS32S32:
175 case NVPTXISD::Tex1DArrayS32Float:
176 case NVPTXISD::Tex1DArrayS32FloatLevel:
177 case NVPTXISD::Tex1DArrayS32FloatGrad:
178 case NVPTXISD::Tex1DArrayU32S32:
179 case NVPTXISD::Tex1DArrayU32Float:
180 case NVPTXISD::Tex1DArrayU32FloatLevel:
181 case NVPTXISD::Tex1DArrayU32FloatGrad:
182 case NVPTXISD::Tex2DFloatS32:
183 case NVPTXISD::Tex2DFloatFloat:
184 case NVPTXISD::Tex2DFloatFloatLevel:
185 case NVPTXISD::Tex2DFloatFloatGrad:
186 case NVPTXISD::Tex2DS32S32:
187 case NVPTXISD::Tex2DS32Float:
188 case NVPTXISD::Tex2DS32FloatLevel:
189 case NVPTXISD::Tex2DS32FloatGrad:
190 case NVPTXISD::Tex2DU32S32:
191 case NVPTXISD::Tex2DU32Float:
192 case NVPTXISD::Tex2DU32FloatLevel:
193 case NVPTXISD::Tex2DU32FloatGrad:
194 case NVPTXISD::Tex2DArrayFloatS32:
195 case NVPTXISD::Tex2DArrayFloatFloat:
196 case NVPTXISD::Tex2DArrayFloatFloatLevel:
197 case NVPTXISD::Tex2DArrayFloatFloatGrad:
198 case NVPTXISD::Tex2DArrayS32S32:
199 case NVPTXISD::Tex2DArrayS32Float:
200 case NVPTXISD::Tex2DArrayS32FloatLevel:
201 case NVPTXISD::Tex2DArrayS32FloatGrad:
202 case NVPTXISD::Tex2DArrayU32S32:
203 case NVPTXISD::Tex2DArrayU32Float:
204 case NVPTXISD::Tex2DArrayU32FloatLevel:
205 case NVPTXISD::Tex2DArrayU32FloatGrad:
206 case NVPTXISD::Tex3DFloatS32:
207 case NVPTXISD::Tex3DFloatFloat:
208 case NVPTXISD::Tex3DFloatFloatLevel:
209 case NVPTXISD::Tex3DFloatFloatGrad:
210 case NVPTXISD::Tex3DS32S32:
211 case NVPTXISD::Tex3DS32Float:
212 case NVPTXISD::Tex3DS32FloatLevel:
213 case NVPTXISD::Tex3DS32FloatGrad:
214 case NVPTXISD::Tex3DU32S32:
215 case NVPTXISD::Tex3DU32Float:
216 case NVPTXISD::Tex3DU32FloatLevel:
217 case NVPTXISD::Tex3DU32FloatGrad:
218 case NVPTXISD::TexCubeFloatFloat:
219 case NVPTXISD::TexCubeFloatFloatLevel:
220 case NVPTXISD::TexCubeS32Float:
221 case NVPTXISD::TexCubeS32FloatLevel:
222 case NVPTXISD::TexCubeU32Float:
223 case NVPTXISD::TexCubeU32FloatLevel:
224 case NVPTXISD::TexCubeArrayFloatFloat:
225 case NVPTXISD::TexCubeArrayFloatFloatLevel:
226 case NVPTXISD::TexCubeArrayS32Float:
227 case NVPTXISD::TexCubeArrayS32FloatLevel:
228 case NVPTXISD::TexCubeArrayU32Float:
229 case NVPTXISD::TexCubeArrayU32FloatLevel:
230 case NVPTXISD::Tld4R2DFloatFloat:
231 case NVPTXISD::Tld4G2DFloatFloat:
232 case NVPTXISD::Tld4B2DFloatFloat:
233 case NVPTXISD::Tld4A2DFloatFloat:
234 case NVPTXISD::Tld4R2DS64Float:
235 case NVPTXISD::Tld4G2DS64Float:
236 case NVPTXISD::Tld4B2DS64Float:
237 case NVPTXISD::Tld4A2DS64Float:
238 case NVPTXISD::Tld4R2DU64Float:
239 case NVPTXISD::Tld4G2DU64Float:
240 case NVPTXISD::Tld4B2DU64Float:
241 case NVPTXISD::Tld4A2DU64Float:
242 case NVPTXISD::TexUnified1DFloatS32:
243 case NVPTXISD::TexUnified1DFloatFloat:
244 case NVPTXISD::TexUnified1DFloatFloatLevel:
245 case NVPTXISD::TexUnified1DFloatFloatGrad:
246 case NVPTXISD::TexUnified1DS32S32:
247 case NVPTXISD::TexUnified1DS32Float:
248 case NVPTXISD::TexUnified1DS32FloatLevel:
249 case NVPTXISD::TexUnified1DS32FloatGrad:
250 case NVPTXISD::TexUnified1DU32S32:
251 case NVPTXISD::TexUnified1DU32Float:
252 case NVPTXISD::TexUnified1DU32FloatLevel:
253 case NVPTXISD::TexUnified1DU32FloatGrad:
254 case NVPTXISD::TexUnified1DArrayFloatS32:
255 case NVPTXISD::TexUnified1DArrayFloatFloat:
256 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
257 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
258 case NVPTXISD::TexUnified1DArrayS32S32:
259 case NVPTXISD::TexUnified1DArrayS32Float:
260 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
261 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
262 case NVPTXISD::TexUnified1DArrayU32S32:
263 case NVPTXISD::TexUnified1DArrayU32Float:
264 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
265 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
266 case NVPTXISD::TexUnified2DFloatS32:
267 case NVPTXISD::TexUnified2DFloatFloat:
268 case NVPTXISD::TexUnified2DFloatFloatLevel:
269 case NVPTXISD::TexUnified2DFloatFloatGrad:
270 case NVPTXISD::TexUnified2DS32S32:
271 case NVPTXISD::TexUnified2DS32Float:
272 case NVPTXISD::TexUnified2DS32FloatLevel:
273 case NVPTXISD::TexUnified2DS32FloatGrad:
274 case NVPTXISD::TexUnified2DU32S32:
275 case NVPTXISD::TexUnified2DU32Float:
276 case NVPTXISD::TexUnified2DU32FloatLevel:
277 case NVPTXISD::TexUnified2DU32FloatGrad:
278 case NVPTXISD::TexUnified2DArrayFloatS32:
279 case NVPTXISD::TexUnified2DArrayFloatFloat:
280 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
281 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
282 case NVPTXISD::TexUnified2DArrayS32S32:
283 case NVPTXISD::TexUnified2DArrayS32Float:
284 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
285 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
286 case NVPTXISD::TexUnified2DArrayU32S32:
287 case NVPTXISD::TexUnified2DArrayU32Float:
288 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
289 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
290 case NVPTXISD::TexUnified3DFloatS32:
291 case NVPTXISD::TexUnified3DFloatFloat:
292 case NVPTXISD::TexUnified3DFloatFloatLevel:
293 case NVPTXISD::TexUnified3DFloatFloatGrad:
294 case NVPTXISD::TexUnified3DS32S32:
295 case NVPTXISD::TexUnified3DS32Float:
296 case NVPTXISD::TexUnified3DS32FloatLevel:
297 case NVPTXISD::TexUnified3DS32FloatGrad:
298 case NVPTXISD::TexUnified3DU32S32:
299 case NVPTXISD::TexUnified3DU32Float:
300 case NVPTXISD::TexUnified3DU32FloatLevel:
301 case NVPTXISD::TexUnified3DU32FloatGrad:
302 case NVPTXISD::TexUnifiedCubeFloatFloat:
303 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
304 case NVPTXISD::TexUnifiedCubeS32Float:
305 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
306 case NVPTXISD::TexUnifiedCubeU32Float:
307 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
308 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
309 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
310 case NVPTXISD::TexUnifiedCubeArrayS32Float:
311 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
312 case NVPTXISD::TexUnifiedCubeArrayU32Float:
313 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
314 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
315 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
316 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
317 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
318 case NVPTXISD::Tld4UnifiedR2DS64Float:
319 case NVPTXISD::Tld4UnifiedG2DS64Float:
320 case NVPTXISD::Tld4UnifiedB2DS64Float:
321 case NVPTXISD::Tld4UnifiedA2DS64Float:
322 case NVPTXISD::Tld4UnifiedR2DU64Float:
323 case NVPTXISD::Tld4UnifiedG2DU64Float:
324 case NVPTXISD::Tld4UnifiedB2DU64Float:
325 case NVPTXISD::Tld4UnifiedA2DU64Float:
326 ResNode = SelectTextureIntrinsic(N);
328 case NVPTXISD::Suld1DI8Clamp:
329 case NVPTXISD::Suld1DI16Clamp:
330 case NVPTXISD::Suld1DI32Clamp:
331 case NVPTXISD::Suld1DI64Clamp:
332 case NVPTXISD::Suld1DV2I8Clamp:
333 case NVPTXISD::Suld1DV2I16Clamp:
334 case NVPTXISD::Suld1DV2I32Clamp:
335 case NVPTXISD::Suld1DV2I64Clamp:
336 case NVPTXISD::Suld1DV4I8Clamp:
337 case NVPTXISD::Suld1DV4I16Clamp:
338 case NVPTXISD::Suld1DV4I32Clamp:
339 case NVPTXISD::Suld1DArrayI8Clamp:
340 case NVPTXISD::Suld1DArrayI16Clamp:
341 case NVPTXISD::Suld1DArrayI32Clamp:
342 case NVPTXISD::Suld1DArrayI64Clamp:
343 case NVPTXISD::Suld1DArrayV2I8Clamp:
344 case NVPTXISD::Suld1DArrayV2I16Clamp:
345 case NVPTXISD::Suld1DArrayV2I32Clamp:
346 case NVPTXISD::Suld1DArrayV2I64Clamp:
347 case NVPTXISD::Suld1DArrayV4I8Clamp:
348 case NVPTXISD::Suld1DArrayV4I16Clamp:
349 case NVPTXISD::Suld1DArrayV4I32Clamp:
350 case NVPTXISD::Suld2DI8Clamp:
351 case NVPTXISD::Suld2DI16Clamp:
352 case NVPTXISD::Suld2DI32Clamp:
353 case NVPTXISD::Suld2DI64Clamp:
354 case NVPTXISD::Suld2DV2I8Clamp:
355 case NVPTXISD::Suld2DV2I16Clamp:
356 case NVPTXISD::Suld2DV2I32Clamp:
357 case NVPTXISD::Suld2DV2I64Clamp:
358 case NVPTXISD::Suld2DV4I8Clamp:
359 case NVPTXISD::Suld2DV4I16Clamp:
360 case NVPTXISD::Suld2DV4I32Clamp:
361 case NVPTXISD::Suld2DArrayI8Clamp:
362 case NVPTXISD::Suld2DArrayI16Clamp:
363 case NVPTXISD::Suld2DArrayI32Clamp:
364 case NVPTXISD::Suld2DArrayI64Clamp:
365 case NVPTXISD::Suld2DArrayV2I8Clamp:
366 case NVPTXISD::Suld2DArrayV2I16Clamp:
367 case NVPTXISD::Suld2DArrayV2I32Clamp:
368 case NVPTXISD::Suld2DArrayV2I64Clamp:
369 case NVPTXISD::Suld2DArrayV4I8Clamp:
370 case NVPTXISD::Suld2DArrayV4I16Clamp:
371 case NVPTXISD::Suld2DArrayV4I32Clamp:
372 case NVPTXISD::Suld3DI8Clamp:
373 case NVPTXISD::Suld3DI16Clamp:
374 case NVPTXISD::Suld3DI32Clamp:
375 case NVPTXISD::Suld3DI64Clamp:
376 case NVPTXISD::Suld3DV2I8Clamp:
377 case NVPTXISD::Suld3DV2I16Clamp:
378 case NVPTXISD::Suld3DV2I32Clamp:
379 case NVPTXISD::Suld3DV2I64Clamp:
380 case NVPTXISD::Suld3DV4I8Clamp:
381 case NVPTXISD::Suld3DV4I16Clamp:
382 case NVPTXISD::Suld3DV4I32Clamp:
383 case NVPTXISD::Suld1DI8Trap:
384 case NVPTXISD::Suld1DI16Trap:
385 case NVPTXISD::Suld1DI32Trap:
386 case NVPTXISD::Suld1DI64Trap:
387 case NVPTXISD::Suld1DV2I8Trap:
388 case NVPTXISD::Suld1DV2I16Trap:
389 case NVPTXISD::Suld1DV2I32Trap:
390 case NVPTXISD::Suld1DV2I64Trap:
391 case NVPTXISD::Suld1DV4I8Trap:
392 case NVPTXISD::Suld1DV4I16Trap:
393 case NVPTXISD::Suld1DV4I32Trap:
394 case NVPTXISD::Suld1DArrayI8Trap:
395 case NVPTXISD::Suld1DArrayI16Trap:
396 case NVPTXISD::Suld1DArrayI32Trap:
397 case NVPTXISD::Suld1DArrayI64Trap:
398 case NVPTXISD::Suld1DArrayV2I8Trap:
399 case NVPTXISD::Suld1DArrayV2I16Trap:
400 case NVPTXISD::Suld1DArrayV2I32Trap:
401 case NVPTXISD::Suld1DArrayV2I64Trap:
402 case NVPTXISD::Suld1DArrayV4I8Trap:
403 case NVPTXISD::Suld1DArrayV4I16Trap:
404 case NVPTXISD::Suld1DArrayV4I32Trap:
405 case NVPTXISD::Suld2DI8Trap:
406 case NVPTXISD::Suld2DI16Trap:
407 case NVPTXISD::Suld2DI32Trap:
408 case NVPTXISD::Suld2DI64Trap:
409 case NVPTXISD::Suld2DV2I8Trap:
410 case NVPTXISD::Suld2DV2I16Trap:
411 case NVPTXISD::Suld2DV2I32Trap:
412 case NVPTXISD::Suld2DV2I64Trap:
413 case NVPTXISD::Suld2DV4I8Trap:
414 case NVPTXISD::Suld2DV4I16Trap:
415 case NVPTXISD::Suld2DV4I32Trap:
416 case NVPTXISD::Suld2DArrayI8Trap:
417 case NVPTXISD::Suld2DArrayI16Trap:
418 case NVPTXISD::Suld2DArrayI32Trap:
419 case NVPTXISD::Suld2DArrayI64Trap:
420 case NVPTXISD::Suld2DArrayV2I8Trap:
421 case NVPTXISD::Suld2DArrayV2I16Trap:
422 case NVPTXISD::Suld2DArrayV2I32Trap:
423 case NVPTXISD::Suld2DArrayV2I64Trap:
424 case NVPTXISD::Suld2DArrayV4I8Trap:
425 case NVPTXISD::Suld2DArrayV4I16Trap:
426 case NVPTXISD::Suld2DArrayV4I32Trap:
427 case NVPTXISD::Suld3DI8Trap:
428 case NVPTXISD::Suld3DI16Trap:
429 case NVPTXISD::Suld3DI32Trap:
430 case NVPTXISD::Suld3DI64Trap:
431 case NVPTXISD::Suld3DV2I8Trap:
432 case NVPTXISD::Suld3DV2I16Trap:
433 case NVPTXISD::Suld3DV2I32Trap:
434 case NVPTXISD::Suld3DV2I64Trap:
435 case NVPTXISD::Suld3DV4I8Trap:
436 case NVPTXISD::Suld3DV4I16Trap:
437 case NVPTXISD::Suld3DV4I32Trap:
438 case NVPTXISD::Suld1DI8Zero:
439 case NVPTXISD::Suld1DI16Zero:
440 case NVPTXISD::Suld1DI32Zero:
441 case NVPTXISD::Suld1DI64Zero:
442 case NVPTXISD::Suld1DV2I8Zero:
443 case NVPTXISD::Suld1DV2I16Zero:
444 case NVPTXISD::Suld1DV2I32Zero:
445 case NVPTXISD::Suld1DV2I64Zero:
446 case NVPTXISD::Suld1DV4I8Zero:
447 case NVPTXISD::Suld1DV4I16Zero:
448 case NVPTXISD::Suld1DV4I32Zero:
449 case NVPTXISD::Suld1DArrayI8Zero:
450 case NVPTXISD::Suld1DArrayI16Zero:
451 case NVPTXISD::Suld1DArrayI32Zero:
452 case NVPTXISD::Suld1DArrayI64Zero:
453 case NVPTXISD::Suld1DArrayV2I8Zero:
454 case NVPTXISD::Suld1DArrayV2I16Zero:
455 case NVPTXISD::Suld1DArrayV2I32Zero:
456 case NVPTXISD::Suld1DArrayV2I64Zero:
457 case NVPTXISD::Suld1DArrayV4I8Zero:
458 case NVPTXISD::Suld1DArrayV4I16Zero:
459 case NVPTXISD::Suld1DArrayV4I32Zero:
460 case NVPTXISD::Suld2DI8Zero:
461 case NVPTXISD::Suld2DI16Zero:
462 case NVPTXISD::Suld2DI32Zero:
463 case NVPTXISD::Suld2DI64Zero:
464 case NVPTXISD::Suld2DV2I8Zero:
465 case NVPTXISD::Suld2DV2I16Zero:
466 case NVPTXISD::Suld2DV2I32Zero:
467 case NVPTXISD::Suld2DV2I64Zero:
468 case NVPTXISD::Suld2DV4I8Zero:
469 case NVPTXISD::Suld2DV4I16Zero:
470 case NVPTXISD::Suld2DV4I32Zero:
471 case NVPTXISD::Suld2DArrayI8Zero:
472 case NVPTXISD::Suld2DArrayI16Zero:
473 case NVPTXISD::Suld2DArrayI32Zero:
474 case NVPTXISD::Suld2DArrayI64Zero:
475 case NVPTXISD::Suld2DArrayV2I8Zero:
476 case NVPTXISD::Suld2DArrayV2I16Zero:
477 case NVPTXISD::Suld2DArrayV2I32Zero:
478 case NVPTXISD::Suld2DArrayV2I64Zero:
479 case NVPTXISD::Suld2DArrayV4I8Zero:
480 case NVPTXISD::Suld2DArrayV4I16Zero:
481 case NVPTXISD::Suld2DArrayV4I32Zero:
482 case NVPTXISD::Suld3DI8Zero:
483 case NVPTXISD::Suld3DI16Zero:
484 case NVPTXISD::Suld3DI32Zero:
485 case NVPTXISD::Suld3DI64Zero:
486 case NVPTXISD::Suld3DV2I8Zero:
487 case NVPTXISD::Suld3DV2I16Zero:
488 case NVPTXISD::Suld3DV2I32Zero:
489 case NVPTXISD::Suld3DV2I64Zero:
490 case NVPTXISD::Suld3DV4I8Zero:
491 case NVPTXISD::Suld3DV4I16Zero:
492 case NVPTXISD::Suld3DV4I32Zero:
493 ResNode = SelectSurfaceIntrinsic(N);
499 ResNode = SelectBFE(N);
501 case ISD::ADDRSPACECAST:
502 ResNode = SelectAddrSpaceCast(N);
509 return SelectCode(N);
512 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
513 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
517 case Intrinsic::nvvm_ldg_global_f:
518 case Intrinsic::nvvm_ldg_global_i:
519 case Intrinsic::nvvm_ldg_global_p:
520 case Intrinsic::nvvm_ldu_global_f:
521 case Intrinsic::nvvm_ldu_global_i:
522 case Intrinsic::nvvm_ldu_global_p:
523 return SelectLDGLDU(N);
527 static unsigned int getCodeAddrSpace(MemSDNode *N) {
528 const Value *Src = N->getMemOperand()->getValue();
531 return NVPTX::PTXLdStInstCode::GENERIC;
533 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
534 switch (PT->getAddressSpace()) {
535 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
536 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
537 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
538 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
539 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
540 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
544 return NVPTX::PTXLdStInstCode::GENERIC;
547 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
548 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
552 case Intrinsic::nvvm_texsurf_handle_internal:
553 return SelectTexSurfHandle(N);
557 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
558 // Op 0 is the intrinsic ID
559 SDValue Wrapper = N->getOperand(1);
560 SDValue GlobalVal = Wrapper.getOperand(0);
561 return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
565 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
566 SDValue Src = N->getOperand(0);
567 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
568 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
569 unsigned DstAddrSpace = CastN->getDestAddressSpace();
571 assert(SrcAddrSpace != DstAddrSpace &&
572 "addrspacecast must be between different address spaces");
574 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
575 // Specific to generic
577 switch (SrcAddrSpace) {
578 default: report_fatal_error("Bad address space in addrspacecast");
579 case ADDRESS_SPACE_GLOBAL:
580 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
582 case ADDRESS_SPACE_SHARED:
583 Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
585 case ADDRESS_SPACE_CONST:
586 Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
588 case ADDRESS_SPACE_LOCAL:
589 Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
592 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
594 // Generic to specific
595 if (SrcAddrSpace != 0)
596 report_fatal_error("Cannot cast between two non-generic address spaces");
598 switch (DstAddrSpace) {
599 default: report_fatal_error("Bad address space in addrspacecast");
600 case ADDRESS_SPACE_GLOBAL:
601 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
602 : NVPTX::cvta_to_global_yes;
604 case ADDRESS_SPACE_SHARED:
605 Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
606 : NVPTX::cvta_to_shared_yes;
608 case ADDRESS_SPACE_CONST:
610 TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
612 case ADDRESS_SPACE_LOCAL:
614 TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
617 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
621 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
623 LoadSDNode *LD = cast<LoadSDNode>(N);
624 EVT LoadedVT = LD->getMemoryVT();
625 SDNode *NVPTXLD = nullptr;
627 // do not support pre/post inc/dec
631 if (!LoadedVT.isSimple())
634 // Address Space Setting
635 unsigned int codeAddrSpace = getCodeAddrSpace(LD);
638 // - .volatile is only availalble for .global and .shared
639 bool isVolatile = LD->isVolatile();
640 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
641 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
642 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
646 MVT SimpleVT = LoadedVT.getSimpleVT();
647 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
648 if (SimpleVT.isVector()) {
649 unsigned num = SimpleVT.getVectorNumElements();
651 vecType = NVPTX::PTXLdStInstCode::V2;
653 vecType = NVPTX::PTXLdStInstCode::V4;
658 // Type Setting: fromType + fromTypeWidth
660 // Sign : ISD::SEXTLOAD
661 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
663 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
664 MVT ScalarVT = SimpleVT.getScalarType();
665 // Read at least 8 bits (predicates are stored as 8-bit values)
666 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
667 unsigned int fromType;
668 if ((LD->getExtensionType() == ISD::SEXTLOAD))
669 fromType = NVPTX::PTXLdStInstCode::Signed;
670 else if (ScalarVT.isFloatingPoint())
671 fromType = NVPTX::PTXLdStInstCode::Float;
673 fromType = NVPTX::PTXLdStInstCode::Unsigned;
675 // Create the machine instruction DAG
676 SDValue Chain = N->getOperand(0);
677 SDValue N1 = N->getOperand(1);
679 SDValue Offset, Base;
681 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
683 if (SelectDirectAddr(N1, Addr)) {
686 Opcode = NVPTX::LD_i8_avar;
689 Opcode = NVPTX::LD_i16_avar;
692 Opcode = NVPTX::LD_i32_avar;
695 Opcode = NVPTX::LD_i64_avar;
698 Opcode = NVPTX::LD_f32_avar;
701 Opcode = NVPTX::LD_f64_avar;
706 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
707 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
708 getI32Imm(fromTypeWidth, dl), Addr, Chain };
709 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
710 } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
711 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
714 Opcode = NVPTX::LD_i8_asi;
717 Opcode = NVPTX::LD_i16_asi;
720 Opcode = NVPTX::LD_i32_asi;
723 Opcode = NVPTX::LD_i64_asi;
726 Opcode = NVPTX::LD_f32_asi;
729 Opcode = NVPTX::LD_f64_asi;
734 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
735 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
736 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
737 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
738 } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
739 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
743 Opcode = NVPTX::LD_i8_ari_64;
746 Opcode = NVPTX::LD_i16_ari_64;
749 Opcode = NVPTX::LD_i32_ari_64;
752 Opcode = NVPTX::LD_i64_ari_64;
755 Opcode = NVPTX::LD_f32_ari_64;
758 Opcode = NVPTX::LD_f64_ari_64;
766 Opcode = NVPTX::LD_i8_ari;
769 Opcode = NVPTX::LD_i16_ari;
772 Opcode = NVPTX::LD_i32_ari;
775 Opcode = NVPTX::LD_i64_ari;
778 Opcode = NVPTX::LD_f32_ari;
781 Opcode = NVPTX::LD_f64_ari;
787 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
788 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
789 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
790 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
795 Opcode = NVPTX::LD_i8_areg_64;
798 Opcode = NVPTX::LD_i16_areg_64;
801 Opcode = NVPTX::LD_i32_areg_64;
804 Opcode = NVPTX::LD_i64_areg_64;
807 Opcode = NVPTX::LD_f32_areg_64;
810 Opcode = NVPTX::LD_f64_areg_64;
818 Opcode = NVPTX::LD_i8_areg;
821 Opcode = NVPTX::LD_i16_areg;
824 Opcode = NVPTX::LD_i32_areg;
827 Opcode = NVPTX::LD_i64_areg;
830 Opcode = NVPTX::LD_f32_areg;
833 Opcode = NVPTX::LD_f64_areg;
839 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
840 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
841 getI32Imm(fromTypeWidth, dl), N1, Chain };
842 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
846 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
847 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
848 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
854 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
856 SDValue Chain = N->getOperand(0);
857 SDValue Op1 = N->getOperand(1);
858 SDValue Addr, Offset, Base;
862 MemSDNode *MemSD = cast<MemSDNode>(N);
863 EVT LoadedVT = MemSD->getMemoryVT();
865 if (!LoadedVT.isSimple())
868 // Address Space Setting
869 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
872 // - .volatile is only availalble for .global and .shared
873 bool IsVolatile = MemSD->isVolatile();
874 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
875 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
876 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
880 MVT SimpleVT = LoadedVT.getSimpleVT();
882 // Type Setting: fromType + fromTypeWidth
884 // Sign : ISD::SEXTLOAD
885 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
887 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
888 MVT ScalarVT = SimpleVT.getScalarType();
889 // Read at least 8 bits (predicates are stored as 8-bit values)
890 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
891 unsigned int FromType;
892 // The last operand holds the original LoadSDNode::getExtensionType() value
893 unsigned ExtensionType = cast<ConstantSDNode>(
894 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
895 if (ExtensionType == ISD::SEXTLOAD)
896 FromType = NVPTX::PTXLdStInstCode::Signed;
897 else if (ScalarVT.isFloatingPoint())
898 FromType = NVPTX::PTXLdStInstCode::Float;
900 FromType = NVPTX::PTXLdStInstCode::Unsigned;
904 switch (N->getOpcode()) {
905 case NVPTXISD::LoadV2:
906 VecType = NVPTX::PTXLdStInstCode::V2;
908 case NVPTXISD::LoadV4:
909 VecType = NVPTX::PTXLdStInstCode::V4;
915 EVT EltVT = N->getValueType(0);
917 if (SelectDirectAddr(Op1, Addr)) {
918 switch (N->getOpcode()) {
921 case NVPTXISD::LoadV2:
922 switch (EltVT.getSimpleVT().SimpleTy) {
926 Opcode = NVPTX::LDV_i8_v2_avar;
929 Opcode = NVPTX::LDV_i16_v2_avar;
932 Opcode = NVPTX::LDV_i32_v2_avar;
935 Opcode = NVPTX::LDV_i64_v2_avar;
938 Opcode = NVPTX::LDV_f32_v2_avar;
941 Opcode = NVPTX::LDV_f64_v2_avar;
945 case NVPTXISD::LoadV4:
946 switch (EltVT.getSimpleVT().SimpleTy) {
950 Opcode = NVPTX::LDV_i8_v4_avar;
953 Opcode = NVPTX::LDV_i16_v4_avar;
956 Opcode = NVPTX::LDV_i32_v4_avar;
959 Opcode = NVPTX::LDV_f32_v4_avar;
965 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
966 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
967 getI32Imm(FromTypeWidth, DL), Addr, Chain };
968 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
969 } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
970 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
971 switch (N->getOpcode()) {
974 case NVPTXISD::LoadV2:
975 switch (EltVT.getSimpleVT().SimpleTy) {
979 Opcode = NVPTX::LDV_i8_v2_asi;
982 Opcode = NVPTX::LDV_i16_v2_asi;
985 Opcode = NVPTX::LDV_i32_v2_asi;
988 Opcode = NVPTX::LDV_i64_v2_asi;
991 Opcode = NVPTX::LDV_f32_v2_asi;
994 Opcode = NVPTX::LDV_f64_v2_asi;
998 case NVPTXISD::LoadV4:
999 switch (EltVT.getSimpleVT().SimpleTy) {
1003 Opcode = NVPTX::LDV_i8_v4_asi;
1006 Opcode = NVPTX::LDV_i16_v4_asi;
1009 Opcode = NVPTX::LDV_i32_v4_asi;
1012 Opcode = NVPTX::LDV_f32_v4_asi;
1018 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1019 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1020 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1021 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1022 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1023 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1025 switch (N->getOpcode()) {
1028 case NVPTXISD::LoadV2:
1029 switch (EltVT.getSimpleVT().SimpleTy) {
1033 Opcode = NVPTX::LDV_i8_v2_ari_64;
1036 Opcode = NVPTX::LDV_i16_v2_ari_64;
1039 Opcode = NVPTX::LDV_i32_v2_ari_64;
1042 Opcode = NVPTX::LDV_i64_v2_ari_64;
1045 Opcode = NVPTX::LDV_f32_v2_ari_64;
1048 Opcode = NVPTX::LDV_f64_v2_ari_64;
1052 case NVPTXISD::LoadV4:
1053 switch (EltVT.getSimpleVT().SimpleTy) {
1057 Opcode = NVPTX::LDV_i8_v4_ari_64;
1060 Opcode = NVPTX::LDV_i16_v4_ari_64;
1063 Opcode = NVPTX::LDV_i32_v4_ari_64;
1066 Opcode = NVPTX::LDV_f32_v4_ari_64;
1072 switch (N->getOpcode()) {
1075 case NVPTXISD::LoadV2:
1076 switch (EltVT.getSimpleVT().SimpleTy) {
1080 Opcode = NVPTX::LDV_i8_v2_ari;
1083 Opcode = NVPTX::LDV_i16_v2_ari;
1086 Opcode = NVPTX::LDV_i32_v2_ari;
1089 Opcode = NVPTX::LDV_i64_v2_ari;
1092 Opcode = NVPTX::LDV_f32_v2_ari;
1095 Opcode = NVPTX::LDV_f64_v2_ari;
1099 case NVPTXISD::LoadV4:
1100 switch (EltVT.getSimpleVT().SimpleTy) {
1104 Opcode = NVPTX::LDV_i8_v4_ari;
1107 Opcode = NVPTX::LDV_i16_v4_ari;
1110 Opcode = NVPTX::LDV_i32_v4_ari;
1113 Opcode = NVPTX::LDV_f32_v4_ari;
1120 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1121 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1122 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1124 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1127 switch (N->getOpcode()) {
1130 case NVPTXISD::LoadV2:
1131 switch (EltVT.getSimpleVT().SimpleTy) {
1135 Opcode = NVPTX::LDV_i8_v2_areg_64;
1138 Opcode = NVPTX::LDV_i16_v2_areg_64;
1141 Opcode = NVPTX::LDV_i32_v2_areg_64;
1144 Opcode = NVPTX::LDV_i64_v2_areg_64;
1147 Opcode = NVPTX::LDV_f32_v2_areg_64;
1150 Opcode = NVPTX::LDV_f64_v2_areg_64;
1154 case NVPTXISD::LoadV4:
1155 switch (EltVT.getSimpleVT().SimpleTy) {
1159 Opcode = NVPTX::LDV_i8_v4_areg_64;
1162 Opcode = NVPTX::LDV_i16_v4_areg_64;
1165 Opcode = NVPTX::LDV_i32_v4_areg_64;
1168 Opcode = NVPTX::LDV_f32_v4_areg_64;
1174 switch (N->getOpcode()) {
1177 case NVPTXISD::LoadV2:
1178 switch (EltVT.getSimpleVT().SimpleTy) {
1182 Opcode = NVPTX::LDV_i8_v2_areg;
1185 Opcode = NVPTX::LDV_i16_v2_areg;
1188 Opcode = NVPTX::LDV_i32_v2_areg;
1191 Opcode = NVPTX::LDV_i64_v2_areg;
1194 Opcode = NVPTX::LDV_f32_v2_areg;
1197 Opcode = NVPTX::LDV_f64_v2_areg;
1201 case NVPTXISD::LoadV4:
1202 switch (EltVT.getSimpleVT().SimpleTy) {
1206 Opcode = NVPTX::LDV_i8_v4_areg;
1209 Opcode = NVPTX::LDV_i16_v4_areg;
1212 Opcode = NVPTX::LDV_i32_v4_areg;
1215 Opcode = NVPTX::LDV_f32_v4_areg;
1222 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1223 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1224 getI32Imm(FromTypeWidth, DL), Op1, Chain };
1225 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1228 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1229 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1230 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1235 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
1237 SDValue Chain = N->getOperand(0);
1242 // If this is an LDG intrinsic, the address is the third operand. Its its an
1243 // LDG/LDU SD node (from custom vector handling), then its the second operand
1244 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1245 Op1 = N->getOperand(2);
1246 Mem = cast<MemIntrinsicSDNode>(N);
1247 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1251 case Intrinsic::nvvm_ldg_global_f:
1252 case Intrinsic::nvvm_ldg_global_i:
1253 case Intrinsic::nvvm_ldg_global_p:
1256 case Intrinsic::nvvm_ldu_global_f:
1257 case Intrinsic::nvvm_ldu_global_i:
1258 case Intrinsic::nvvm_ldu_global_p:
1263 Op1 = N->getOperand(1);
1264 Mem = cast<MemSDNode>(N);
1270 SDValue Base, Offset, Addr;
1272 EVT EltVT = Mem->getMemoryVT();
1273 if (EltVT.isVector()) {
1274 EltVT = EltVT.getVectorElementType();
1277 if (SelectDirectAddr(Op1, Addr)) {
1278 switch (N->getOpcode()) {
1281 case ISD::INTRINSIC_W_CHAIN:
1283 switch (EltVT.getSimpleVT().SimpleTy) {
1287 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1290 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1293 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1296 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1299 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1302 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1306 switch (EltVT.getSimpleVT().SimpleTy) {
1310 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1313 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1316 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1319 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1322 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1325 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1330 case NVPTXISD::LDGV2:
1331 switch (EltVT.getSimpleVT().SimpleTy) {
1335 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1338 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1341 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1344 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1347 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1350 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1354 case NVPTXISD::LDUV2:
1355 switch (EltVT.getSimpleVT().SimpleTy) {
1359 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1362 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1365 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1368 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1371 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1374 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1378 case NVPTXISD::LDGV4:
1379 switch (EltVT.getSimpleVT().SimpleTy) {
1383 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1386 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1389 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1392 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1396 case NVPTXISD::LDUV4:
1397 switch (EltVT.getSimpleVT().SimpleTy) {
1401 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1404 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1407 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1410 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1416 SDValue Ops[] = { Addr, Chain };
1417 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1418 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1419 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1421 switch (N->getOpcode()) {
1424 case ISD::INTRINSIC_W_CHAIN:
1426 switch (EltVT.getSimpleVT().SimpleTy) {
1430 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1433 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1436 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1439 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1442 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1445 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1449 switch (EltVT.getSimpleVT().SimpleTy) {
1453 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1456 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1459 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1462 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1465 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1468 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1473 case NVPTXISD::LDGV2:
1474 switch (EltVT.getSimpleVT().SimpleTy) {
1478 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1481 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1484 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1487 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1490 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1493 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1497 case NVPTXISD::LDUV2:
1498 switch (EltVT.getSimpleVT().SimpleTy) {
1502 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1505 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1508 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1511 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1514 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1517 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1521 case NVPTXISD::LDGV4:
1522 switch (EltVT.getSimpleVT().SimpleTy) {
1526 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1529 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1532 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1535 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1539 case NVPTXISD::LDUV4:
1540 switch (EltVT.getSimpleVT().SimpleTy) {
1544 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1547 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1550 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1553 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1559 switch (N->getOpcode()) {
1562 case ISD::INTRINSIC_W_CHAIN:
1564 switch (EltVT.getSimpleVT().SimpleTy) {
1568 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1571 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1574 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1577 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1580 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1583 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1587 switch (EltVT.getSimpleVT().SimpleTy) {
1591 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1594 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1597 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1600 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1603 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1606 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1611 case NVPTXISD::LDGV2:
1612 switch (EltVT.getSimpleVT().SimpleTy) {
1616 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1619 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1622 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1625 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1628 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1631 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1635 case NVPTXISD::LDUV2:
1636 switch (EltVT.getSimpleVT().SimpleTy) {
1640 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1643 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1646 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1649 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1652 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1655 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1659 case NVPTXISD::LDGV4:
1660 switch (EltVT.getSimpleVT().SimpleTy) {
1664 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1667 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1670 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1673 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1677 case NVPTXISD::LDUV4:
1678 switch (EltVT.getSimpleVT().SimpleTy) {
1682 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1685 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1688 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1691 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1698 SDValue Ops[] = { Base, Offset, Chain };
1700 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1703 switch (N->getOpcode()) {
1706 case ISD::INTRINSIC_W_CHAIN:
1708 switch (EltVT.getSimpleVT().SimpleTy) {
1712 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1715 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1718 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1721 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1724 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1727 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1731 switch (EltVT.getSimpleVT().SimpleTy) {
1735 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1738 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1741 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1744 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1747 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1750 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1755 case NVPTXISD::LDGV2:
1756 switch (EltVT.getSimpleVT().SimpleTy) {
1760 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1763 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1766 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1769 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1772 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1775 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1779 case NVPTXISD::LDUV2:
1780 switch (EltVT.getSimpleVT().SimpleTy) {
1784 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1787 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1790 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1793 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1796 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1799 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1803 case NVPTXISD::LDGV4:
1804 switch (EltVT.getSimpleVT().SimpleTy) {
1808 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1811 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1814 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1817 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1821 case NVPTXISD::LDUV4:
1822 switch (EltVT.getSimpleVT().SimpleTy) {
1826 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1829 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1832 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1835 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1841 switch (N->getOpcode()) {
1844 case ISD::INTRINSIC_W_CHAIN:
1846 switch (EltVT.getSimpleVT().SimpleTy) {
1850 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1853 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1856 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1859 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1862 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1865 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1869 switch (EltVT.getSimpleVT().SimpleTy) {
1873 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1876 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1879 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1882 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1885 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1888 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1893 case NVPTXISD::LDGV2:
1894 switch (EltVT.getSimpleVT().SimpleTy) {
1898 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1901 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1904 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1907 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1910 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1913 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1917 case NVPTXISD::LDUV2:
1918 switch (EltVT.getSimpleVT().SimpleTy) {
1922 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1925 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1928 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1931 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1934 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1937 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1941 case NVPTXISD::LDGV4:
1942 switch (EltVT.getSimpleVT().SimpleTy) {
1946 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1949 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1952 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1955 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1959 case NVPTXISD::LDUV4:
1960 switch (EltVT.getSimpleVT().SimpleTy) {
1964 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1967 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1970 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1973 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1980 SDValue Ops[] = { Op1, Chain };
1981 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1984 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1985 MemRefs0[0] = Mem->getMemOperand();
1986 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1991 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
1993 StoreSDNode *ST = cast<StoreSDNode>(N);
1994 EVT StoreVT = ST->getMemoryVT();
1995 SDNode *NVPTXST = nullptr;
1997 // do not support pre/post inc/dec
1998 if (ST->isIndexed())
2001 if (!StoreVT.isSimple())
2004 // Address Space Setting
2005 unsigned int codeAddrSpace = getCodeAddrSpace(ST);
2008 // - .volatile is only availalble for .global and .shared
2009 bool isVolatile = ST->isVolatile();
2010 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2011 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2012 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2016 MVT SimpleVT = StoreVT.getSimpleVT();
2017 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
2018 if (SimpleVT.isVector()) {
2019 unsigned num = SimpleVT.getVectorNumElements();
2021 vecType = NVPTX::PTXLdStInstCode::V2;
2023 vecType = NVPTX::PTXLdStInstCode::V4;
2028 // Type Setting: toType + toTypeWidth
2029 // - for integer type, always use 'u'
2031 MVT ScalarVT = SimpleVT.getScalarType();
2032 unsigned toTypeWidth = ScalarVT.getSizeInBits();
2033 unsigned int toType;
2034 if (ScalarVT.isFloatingPoint())
2035 toType = NVPTX::PTXLdStInstCode::Float;
2037 toType = NVPTX::PTXLdStInstCode::Unsigned;
2039 // Create the machine instruction DAG
2040 SDValue Chain = N->getOperand(0);
2041 SDValue N1 = N->getOperand(1);
2042 SDValue N2 = N->getOperand(2);
2044 SDValue Offset, Base;
2046 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
2048 if (SelectDirectAddr(N2, Addr)) {
2051 Opcode = NVPTX::ST_i8_avar;
2054 Opcode = NVPTX::ST_i16_avar;
2057 Opcode = NVPTX::ST_i32_avar;
2060 Opcode = NVPTX::ST_i64_avar;
2063 Opcode = NVPTX::ST_f32_avar;
2066 Opcode = NVPTX::ST_f64_avar;
2071 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2072 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2073 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
2075 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2076 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2077 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2080 Opcode = NVPTX::ST_i8_asi;
2083 Opcode = NVPTX::ST_i16_asi;
2086 Opcode = NVPTX::ST_i32_asi;
2089 Opcode = NVPTX::ST_i64_asi;
2092 Opcode = NVPTX::ST_f32_asi;
2095 Opcode = NVPTX::ST_f64_asi;
2100 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2101 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2102 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2104 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2105 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2106 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2110 Opcode = NVPTX::ST_i8_ari_64;
2113 Opcode = NVPTX::ST_i16_ari_64;
2116 Opcode = NVPTX::ST_i32_ari_64;
2119 Opcode = NVPTX::ST_i64_ari_64;
2122 Opcode = NVPTX::ST_f32_ari_64;
2125 Opcode = NVPTX::ST_f64_ari_64;
2133 Opcode = NVPTX::ST_i8_ari;
2136 Opcode = NVPTX::ST_i16_ari;
2139 Opcode = NVPTX::ST_i32_ari;
2142 Opcode = NVPTX::ST_i64_ari;
2145 Opcode = NVPTX::ST_f32_ari;
2148 Opcode = NVPTX::ST_f64_ari;
2154 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2155 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2156 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2158 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2163 Opcode = NVPTX::ST_i8_areg_64;
2166 Opcode = NVPTX::ST_i16_areg_64;
2169 Opcode = NVPTX::ST_i32_areg_64;
2172 Opcode = NVPTX::ST_i64_areg_64;
2175 Opcode = NVPTX::ST_f32_areg_64;
2178 Opcode = NVPTX::ST_f64_areg_64;
2186 Opcode = NVPTX::ST_i8_areg;
2189 Opcode = NVPTX::ST_i16_areg;
2192 Opcode = NVPTX::ST_i32_areg;
2195 Opcode = NVPTX::ST_i64_areg;
2198 Opcode = NVPTX::ST_f32_areg;
2201 Opcode = NVPTX::ST_f64_areg;
2207 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2208 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2209 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
2211 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2215 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2216 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2217 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2223 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
2224 SDValue Chain = N->getOperand(0);
2225 SDValue Op1 = N->getOperand(1);
2226 SDValue Addr, Offset, Base;
2230 EVT EltVT = Op1.getValueType();
2231 MemSDNode *MemSD = cast<MemSDNode>(N);
2232 EVT StoreVT = MemSD->getMemoryVT();
2234 // Address Space Setting
2235 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
2237 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2238 report_fatal_error("Cannot store to pointer that points to constant "
2243 // - .volatile is only availalble for .global and .shared
2244 bool IsVolatile = MemSD->isVolatile();
2245 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2246 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2247 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2250 // Type Setting: toType + toTypeWidth
2251 // - for integer type, always use 'u'
2252 assert(StoreVT.isSimple() && "Store value is not simple");
2253 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2254 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2256 if (ScalarVT.isFloatingPoint())
2257 ToType = NVPTX::PTXLdStInstCode::Float;
2259 ToType = NVPTX::PTXLdStInstCode::Unsigned;
2261 SmallVector<SDValue, 12> StOps;
2265 switch (N->getOpcode()) {
2266 case NVPTXISD::StoreV2:
2267 VecType = NVPTX::PTXLdStInstCode::V2;
2268 StOps.push_back(N->getOperand(1));
2269 StOps.push_back(N->getOperand(2));
2270 N2 = N->getOperand(3);
2272 case NVPTXISD::StoreV4:
2273 VecType = NVPTX::PTXLdStInstCode::V4;
2274 StOps.push_back(N->getOperand(1));
2275 StOps.push_back(N->getOperand(2));
2276 StOps.push_back(N->getOperand(3));
2277 StOps.push_back(N->getOperand(4));
2278 N2 = N->getOperand(5);
2284 StOps.push_back(getI32Imm(IsVolatile, DL));
2285 StOps.push_back(getI32Imm(CodeAddrSpace, DL));
2286 StOps.push_back(getI32Imm(VecType, DL));
2287 StOps.push_back(getI32Imm(ToType, DL));
2288 StOps.push_back(getI32Imm(ToTypeWidth, DL));
2290 if (SelectDirectAddr(N2, Addr)) {
2291 switch (N->getOpcode()) {
2294 case NVPTXISD::StoreV2:
2295 switch (EltVT.getSimpleVT().SimpleTy) {
2299 Opcode = NVPTX::STV_i8_v2_avar;
2302 Opcode = NVPTX::STV_i16_v2_avar;
2305 Opcode = NVPTX::STV_i32_v2_avar;
2308 Opcode = NVPTX::STV_i64_v2_avar;
2311 Opcode = NVPTX::STV_f32_v2_avar;
2314 Opcode = NVPTX::STV_f64_v2_avar;
2318 case NVPTXISD::StoreV4:
2319 switch (EltVT.getSimpleVT().SimpleTy) {
2323 Opcode = NVPTX::STV_i8_v4_avar;
2326 Opcode = NVPTX::STV_i16_v4_avar;
2329 Opcode = NVPTX::STV_i32_v4_avar;
2332 Opcode = NVPTX::STV_f32_v4_avar;
2337 StOps.push_back(Addr);
2338 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2339 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2340 switch (N->getOpcode()) {
2343 case NVPTXISD::StoreV2:
2344 switch (EltVT.getSimpleVT().SimpleTy) {
2348 Opcode = NVPTX::STV_i8_v2_asi;
2351 Opcode = NVPTX::STV_i16_v2_asi;
2354 Opcode = NVPTX::STV_i32_v2_asi;
2357 Opcode = NVPTX::STV_i64_v2_asi;
2360 Opcode = NVPTX::STV_f32_v2_asi;
2363 Opcode = NVPTX::STV_f64_v2_asi;
2367 case NVPTXISD::StoreV4:
2368 switch (EltVT.getSimpleVT().SimpleTy) {
2372 Opcode = NVPTX::STV_i8_v4_asi;
2375 Opcode = NVPTX::STV_i16_v4_asi;
2378 Opcode = NVPTX::STV_i32_v4_asi;
2381 Opcode = NVPTX::STV_f32_v4_asi;
2386 StOps.push_back(Base);
2387 StOps.push_back(Offset);
2388 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2389 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2391 switch (N->getOpcode()) {
2394 case NVPTXISD::StoreV2:
2395 switch (EltVT.getSimpleVT().SimpleTy) {
2399 Opcode = NVPTX::STV_i8_v2_ari_64;
2402 Opcode = NVPTX::STV_i16_v2_ari_64;
2405 Opcode = NVPTX::STV_i32_v2_ari_64;
2408 Opcode = NVPTX::STV_i64_v2_ari_64;
2411 Opcode = NVPTX::STV_f32_v2_ari_64;
2414 Opcode = NVPTX::STV_f64_v2_ari_64;
2418 case NVPTXISD::StoreV4:
2419 switch (EltVT.getSimpleVT().SimpleTy) {
2423 Opcode = NVPTX::STV_i8_v4_ari_64;
2426 Opcode = NVPTX::STV_i16_v4_ari_64;
2429 Opcode = NVPTX::STV_i32_v4_ari_64;
2432 Opcode = NVPTX::STV_f32_v4_ari_64;
2438 switch (N->getOpcode()) {
2441 case NVPTXISD::StoreV2:
2442 switch (EltVT.getSimpleVT().SimpleTy) {
2446 Opcode = NVPTX::STV_i8_v2_ari;
2449 Opcode = NVPTX::STV_i16_v2_ari;
2452 Opcode = NVPTX::STV_i32_v2_ari;
2455 Opcode = NVPTX::STV_i64_v2_ari;
2458 Opcode = NVPTX::STV_f32_v2_ari;
2461 Opcode = NVPTX::STV_f64_v2_ari;
2465 case NVPTXISD::StoreV4:
2466 switch (EltVT.getSimpleVT().SimpleTy) {
2470 Opcode = NVPTX::STV_i8_v4_ari;
2473 Opcode = NVPTX::STV_i16_v4_ari;
2476 Opcode = NVPTX::STV_i32_v4_ari;
2479 Opcode = NVPTX::STV_f32_v4_ari;
2485 StOps.push_back(Base);
2486 StOps.push_back(Offset);
2489 switch (N->getOpcode()) {
2492 case NVPTXISD::StoreV2:
2493 switch (EltVT.getSimpleVT().SimpleTy) {
2497 Opcode = NVPTX::STV_i8_v2_areg_64;
2500 Opcode = NVPTX::STV_i16_v2_areg_64;
2503 Opcode = NVPTX::STV_i32_v2_areg_64;
2506 Opcode = NVPTX::STV_i64_v2_areg_64;
2509 Opcode = NVPTX::STV_f32_v2_areg_64;
2512 Opcode = NVPTX::STV_f64_v2_areg_64;
2516 case NVPTXISD::StoreV4:
2517 switch (EltVT.getSimpleVT().SimpleTy) {
2521 Opcode = NVPTX::STV_i8_v4_areg_64;
2524 Opcode = NVPTX::STV_i16_v4_areg_64;
2527 Opcode = NVPTX::STV_i32_v4_areg_64;
2530 Opcode = NVPTX::STV_f32_v4_areg_64;
2536 switch (N->getOpcode()) {
2539 case NVPTXISD::StoreV2:
2540 switch (EltVT.getSimpleVT().SimpleTy) {
2544 Opcode = NVPTX::STV_i8_v2_areg;
2547 Opcode = NVPTX::STV_i16_v2_areg;
2550 Opcode = NVPTX::STV_i32_v2_areg;
2553 Opcode = NVPTX::STV_i64_v2_areg;
2556 Opcode = NVPTX::STV_f32_v2_areg;
2559 Opcode = NVPTX::STV_f64_v2_areg;
2563 case NVPTXISD::StoreV4:
2564 switch (EltVT.getSimpleVT().SimpleTy) {
2568 Opcode = NVPTX::STV_i8_v4_areg;
2571 Opcode = NVPTX::STV_i16_v4_areg;
2574 Opcode = NVPTX::STV_i32_v4_areg;
2577 Opcode = NVPTX::STV_f32_v4_areg;
2583 StOps.push_back(N2);
2586 StOps.push_back(Chain);
2588 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2590 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2591 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2592 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2597 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2598 SDValue Chain = Node->getOperand(0);
2599 SDValue Offset = Node->getOperand(2);
2600 SDValue Flag = Node->getOperand(3);
2602 MemSDNode *Mem = cast<MemSDNode>(Node);
2605 switch (Node->getOpcode()) {
2608 case NVPTXISD::LoadParam:
2611 case NVPTXISD::LoadParamV2:
2614 case NVPTXISD::LoadParamV4:
2619 EVT EltVT = Node->getValueType(0);
2620 EVT MemVT = Mem->getMemoryVT();
2628 switch (MemVT.getSimpleVT().SimpleTy) {
2632 Opc = NVPTX::LoadParamMemI8;
2635 Opc = NVPTX::LoadParamMemI8;
2638 Opc = NVPTX::LoadParamMemI16;
2641 Opc = NVPTX::LoadParamMemI32;
2644 Opc = NVPTX::LoadParamMemI64;
2647 Opc = NVPTX::LoadParamMemF32;
2650 Opc = NVPTX::LoadParamMemF64;
2655 switch (MemVT.getSimpleVT().SimpleTy) {
2659 Opc = NVPTX::LoadParamMemV2I8;
2662 Opc = NVPTX::LoadParamMemV2I8;
2665 Opc = NVPTX::LoadParamMemV2I16;
2668 Opc = NVPTX::LoadParamMemV2I32;
2671 Opc = NVPTX::LoadParamMemV2I64;
2674 Opc = NVPTX::LoadParamMemV2F32;
2677 Opc = NVPTX::LoadParamMemV2F64;
2682 switch (MemVT.getSimpleVT().SimpleTy) {
2686 Opc = NVPTX::LoadParamMemV4I8;
2689 Opc = NVPTX::LoadParamMemV4I8;
2692 Opc = NVPTX::LoadParamMemV4I16;
2695 Opc = NVPTX::LoadParamMemV4I32;
2698 Opc = NVPTX::LoadParamMemV4F32;
2706 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2707 } else if (VecSize == 2) {
2708 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2710 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2711 VTs = CurDAG->getVTList(EVTs);
2714 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2716 SmallVector<SDValue, 2> Ops;
2717 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2718 Ops.push_back(Chain);
2719 Ops.push_back(Flag);
2721 return CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2724 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2726 SDValue Chain = N->getOperand(0);
2727 SDValue Offset = N->getOperand(1);
2728 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2729 MemSDNode *Mem = cast<MemSDNode>(N);
2731 // How many elements do we have?
2732 unsigned NumElts = 1;
2733 switch (N->getOpcode()) {
2736 case NVPTXISD::StoreRetval:
2739 case NVPTXISD::StoreRetvalV2:
2742 case NVPTXISD::StoreRetvalV4:
2747 // Build vector of operands
2748 SmallVector<SDValue, 6> Ops;
2749 for (unsigned i = 0; i < NumElts; ++i)
2750 Ops.push_back(N->getOperand(i + 2));
2751 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2752 Ops.push_back(Chain);
2754 // Determine target opcode
2755 // If we have an i1, use an 8-bit store. The lowering code in
2756 // NVPTXISelLowering will have already emitted an upcast.
2757 unsigned Opcode = 0;
2762 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2766 Opcode = NVPTX::StoreRetvalI8;
2769 Opcode = NVPTX::StoreRetvalI8;
2772 Opcode = NVPTX::StoreRetvalI16;
2775 Opcode = NVPTX::StoreRetvalI32;
2778 Opcode = NVPTX::StoreRetvalI64;
2781 Opcode = NVPTX::StoreRetvalF32;
2784 Opcode = NVPTX::StoreRetvalF64;
2789 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2793 Opcode = NVPTX::StoreRetvalV2I8;
2796 Opcode = NVPTX::StoreRetvalV2I8;
2799 Opcode = NVPTX::StoreRetvalV2I16;
2802 Opcode = NVPTX::StoreRetvalV2I32;
2805 Opcode = NVPTX::StoreRetvalV2I64;
2808 Opcode = NVPTX::StoreRetvalV2F32;
2811 Opcode = NVPTX::StoreRetvalV2F64;
2816 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2820 Opcode = NVPTX::StoreRetvalV4I8;
2823 Opcode = NVPTX::StoreRetvalV4I8;
2826 Opcode = NVPTX::StoreRetvalV4I16;
2829 Opcode = NVPTX::StoreRetvalV4I32;
2832 Opcode = NVPTX::StoreRetvalV4F32;
2839 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2840 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2841 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2842 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2847 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2849 SDValue Chain = N->getOperand(0);
2850 SDValue Param = N->getOperand(1);
2851 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2852 SDValue Offset = N->getOperand(2);
2853 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2854 MemSDNode *Mem = cast<MemSDNode>(N);
2855 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2857 // How many elements do we have?
2858 unsigned NumElts = 1;
2859 switch (N->getOpcode()) {
2862 case NVPTXISD::StoreParamU32:
2863 case NVPTXISD::StoreParamS32:
2864 case NVPTXISD::StoreParam:
2867 case NVPTXISD::StoreParamV2:
2870 case NVPTXISD::StoreParamV4:
2875 // Build vector of operands
2876 SmallVector<SDValue, 8> Ops;
2877 for (unsigned i = 0; i < NumElts; ++i)
2878 Ops.push_back(N->getOperand(i + 3));
2879 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2880 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2881 Ops.push_back(Chain);
2882 Ops.push_back(Flag);
2884 // Determine target opcode
2885 // If we have an i1, use an 8-bit store. The lowering code in
2886 // NVPTXISelLowering will have already emitted an upcast.
2887 unsigned Opcode = 0;
2888 switch (N->getOpcode()) {
2894 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2898 Opcode = NVPTX::StoreParamI8;
2901 Opcode = NVPTX::StoreParamI8;
2904 Opcode = NVPTX::StoreParamI16;
2907 Opcode = NVPTX::StoreParamI32;
2910 Opcode = NVPTX::StoreParamI64;
2913 Opcode = NVPTX::StoreParamF32;
2916 Opcode = NVPTX::StoreParamF64;
2921 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2925 Opcode = NVPTX::StoreParamV2I8;
2928 Opcode = NVPTX::StoreParamV2I8;
2931 Opcode = NVPTX::StoreParamV2I16;
2934 Opcode = NVPTX::StoreParamV2I32;
2937 Opcode = NVPTX::StoreParamV2I64;
2940 Opcode = NVPTX::StoreParamV2F32;
2943 Opcode = NVPTX::StoreParamV2F64;
2948 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2952 Opcode = NVPTX::StoreParamV4I8;
2955 Opcode = NVPTX::StoreParamV4I8;
2958 Opcode = NVPTX::StoreParamV4I16;
2961 Opcode = NVPTX::StoreParamV4I32;
2964 Opcode = NVPTX::StoreParamV4F32;
2970 // Special case: if we have a sign-extend/zero-extend node, insert the
2971 // conversion instruction first, and use that as the value operand to
2972 // the selected StoreParam node.
2973 case NVPTXISD::StoreParamU32: {
2974 Opcode = NVPTX::StoreParamI32;
2975 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
2977 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2978 MVT::i32, Ops[0], CvtNone);
2979 Ops[0] = SDValue(Cvt, 0);
2982 case NVPTXISD::StoreParamS32: {
2983 Opcode = NVPTX::StoreParamI32;
2984 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
2986 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2987 MVT::i32, Ops[0], CvtNone);
2988 Ops[0] = SDValue(Cvt, 0);
2993 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2995 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
2996 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2997 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2998 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
3003 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
3004 SDValue Chain = N->getOperand(0);
3005 SDNode *Ret = nullptr;
3007 SmallVector<SDValue, 8> Ops;
3009 switch (N->getOpcode()) {
3010 default: return nullptr;
3011 case NVPTXISD::Tex1DFloatS32:
3012 Opc = NVPTX::TEX_1D_F32_S32;
3014 case NVPTXISD::Tex1DFloatFloat:
3015 Opc = NVPTX::TEX_1D_F32_F32;
3017 case NVPTXISD::Tex1DFloatFloatLevel:
3018 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3020 case NVPTXISD::Tex1DFloatFloatGrad:
3021 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3023 case NVPTXISD::Tex1DS32S32:
3024 Opc = NVPTX::TEX_1D_S32_S32;
3026 case NVPTXISD::Tex1DS32Float:
3027 Opc = NVPTX::TEX_1D_S32_F32;
3029 case NVPTXISD::Tex1DS32FloatLevel:
3030 Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
3032 case NVPTXISD::Tex1DS32FloatGrad:
3033 Opc = NVPTX::TEX_1D_S32_F32_GRAD;
3035 case NVPTXISD::Tex1DU32S32:
3036 Opc = NVPTX::TEX_1D_U32_S32;
3038 case NVPTXISD::Tex1DU32Float:
3039 Opc = NVPTX::TEX_1D_U32_F32;
3041 case NVPTXISD::Tex1DU32FloatLevel:
3042 Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3044 case NVPTXISD::Tex1DU32FloatGrad:
3045 Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3047 case NVPTXISD::Tex1DArrayFloatS32:
3048 Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
3050 case NVPTXISD::Tex1DArrayFloatFloat:
3051 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3053 case NVPTXISD::Tex1DArrayFloatFloatLevel:
3054 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3056 case NVPTXISD::Tex1DArrayFloatFloatGrad:
3057 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3059 case NVPTXISD::Tex1DArrayS32S32:
3060 Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
3062 case NVPTXISD::Tex1DArrayS32Float:
3063 Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
3065 case NVPTXISD::Tex1DArrayS32FloatLevel:
3066 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
3068 case NVPTXISD::Tex1DArrayS32FloatGrad:
3069 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
3071 case NVPTXISD::Tex1DArrayU32S32:
3072 Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3074 case NVPTXISD::Tex1DArrayU32Float:
3075 Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3077 case NVPTXISD::Tex1DArrayU32FloatLevel:
3078 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3080 case NVPTXISD::Tex1DArrayU32FloatGrad:
3081 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3083 case NVPTXISD::Tex2DFloatS32:
3084 Opc = NVPTX::TEX_2D_F32_S32;
3086 case NVPTXISD::Tex2DFloatFloat:
3087 Opc = NVPTX::TEX_2D_F32_F32;
3089 case NVPTXISD::Tex2DFloatFloatLevel:
3090 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3092 case NVPTXISD::Tex2DFloatFloatGrad:
3093 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3095 case NVPTXISD::Tex2DS32S32:
3096 Opc = NVPTX::TEX_2D_S32_S32;
3098 case NVPTXISD::Tex2DS32Float:
3099 Opc = NVPTX::TEX_2D_S32_F32;
3101 case NVPTXISD::Tex2DS32FloatLevel:
3102 Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
3104 case NVPTXISD::Tex2DS32FloatGrad:
3105 Opc = NVPTX::TEX_2D_S32_F32_GRAD;
3107 case NVPTXISD::Tex2DU32S32:
3108 Opc = NVPTX::TEX_2D_U32_S32;
3110 case NVPTXISD::Tex2DU32Float:
3111 Opc = NVPTX::TEX_2D_U32_F32;
3113 case NVPTXISD::Tex2DU32FloatLevel:
3114 Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3116 case NVPTXISD::Tex2DU32FloatGrad:
3117 Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3119 case NVPTXISD::Tex2DArrayFloatS32:
3120 Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
3122 case NVPTXISD::Tex2DArrayFloatFloat:
3123 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3125 case NVPTXISD::Tex2DArrayFloatFloatLevel:
3126 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3128 case NVPTXISD::Tex2DArrayFloatFloatGrad:
3129 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3131 case NVPTXISD::Tex2DArrayS32S32:
3132 Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
3134 case NVPTXISD::Tex2DArrayS32Float:
3135 Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
3137 case NVPTXISD::Tex2DArrayS32FloatLevel:
3138 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
3140 case NVPTXISD::Tex2DArrayS32FloatGrad:
3141 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
3143 case NVPTXISD::Tex2DArrayU32S32:
3144 Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3146 case NVPTXISD::Tex2DArrayU32Float:
3147 Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3149 case NVPTXISD::Tex2DArrayU32FloatLevel:
3150 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3152 case NVPTXISD::Tex2DArrayU32FloatGrad:
3153 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3155 case NVPTXISD::Tex3DFloatS32:
3156 Opc = NVPTX::TEX_3D_F32_S32;
3158 case NVPTXISD::Tex3DFloatFloat:
3159 Opc = NVPTX::TEX_3D_F32_F32;
3161 case NVPTXISD::Tex3DFloatFloatLevel:
3162 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3164 case NVPTXISD::Tex3DFloatFloatGrad:
3165 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3167 case NVPTXISD::Tex3DS32S32:
3168 Opc = NVPTX::TEX_3D_S32_S32;
3170 case NVPTXISD::Tex3DS32Float:
3171 Opc = NVPTX::TEX_3D_S32_F32;
3173 case NVPTXISD::Tex3DS32FloatLevel:
3174 Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
3176 case NVPTXISD::Tex3DS32FloatGrad:
3177 Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3179 case NVPTXISD::Tex3DU32S32:
3180 Opc = NVPTX::TEX_3D_U32_S32;
3182 case NVPTXISD::Tex3DU32Float:
3183 Opc = NVPTX::TEX_3D_U32_F32;
3185 case NVPTXISD::Tex3DU32FloatLevel:
3186 Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3188 case NVPTXISD::Tex3DU32FloatGrad:
3189 Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3191 case NVPTXISD::TexCubeFloatFloat:
3192 Opc = NVPTX::TEX_CUBE_F32_F32;
3194 case NVPTXISD::TexCubeFloatFloatLevel:
3195 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3197 case NVPTXISD::TexCubeS32Float:
3198 Opc = NVPTX::TEX_CUBE_S32_F32;
3200 case NVPTXISD::TexCubeS32FloatLevel:
3201 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3203 case NVPTXISD::TexCubeU32Float:
3204 Opc = NVPTX::TEX_CUBE_U32_F32;
3206 case NVPTXISD::TexCubeU32FloatLevel:
3207 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3209 case NVPTXISD::TexCubeArrayFloatFloat:
3210 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3212 case NVPTXISD::TexCubeArrayFloatFloatLevel:
3213 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3215 case NVPTXISD::TexCubeArrayS32Float:
3216 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3218 case NVPTXISD::TexCubeArrayS32FloatLevel:
3219 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3221 case NVPTXISD::TexCubeArrayU32Float:
3222 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3224 case NVPTXISD::TexCubeArrayU32FloatLevel:
3225 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3227 case NVPTXISD::Tld4R2DFloatFloat:
3228 Opc = NVPTX::TLD4_R_2D_F32_F32;
3230 case NVPTXISD::Tld4G2DFloatFloat:
3231 Opc = NVPTX::TLD4_G_2D_F32_F32;
3233 case NVPTXISD::Tld4B2DFloatFloat:
3234 Opc = NVPTX::TLD4_B_2D_F32_F32;
3236 case NVPTXISD::Tld4A2DFloatFloat:
3237 Opc = NVPTX::TLD4_A_2D_F32_F32;
3239 case NVPTXISD::Tld4R2DS64Float:
3240 Opc = NVPTX::TLD4_R_2D_S32_F32;
3242 case NVPTXISD::Tld4G2DS64Float:
3243 Opc = NVPTX::TLD4_G_2D_S32_F32;
3245 case NVPTXISD::Tld4B2DS64Float:
3246 Opc = NVPTX::TLD4_B_2D_S32_F32;
3248 case NVPTXISD::Tld4A2DS64Float:
3249 Opc = NVPTX::TLD4_A_2D_S32_F32;
3251 case NVPTXISD::Tld4R2DU64Float:
3252 Opc = NVPTX::TLD4_R_2D_U32_F32;
3254 case NVPTXISD::Tld4G2DU64Float:
3255 Opc = NVPTX::TLD4_G_2D_U32_F32;
3257 case NVPTXISD::Tld4B2DU64Float:
3258 Opc = NVPTX::TLD4_B_2D_U32_F32;
3260 case NVPTXISD::Tld4A2DU64Float:
3261 Opc = NVPTX::TLD4_A_2D_U32_F32;
3263 case NVPTXISD::TexUnified1DFloatS32:
3264 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3266 case NVPTXISD::TexUnified1DFloatFloat:
3267 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3269 case NVPTXISD::TexUnified1DFloatFloatLevel:
3270 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3272 case NVPTXISD::TexUnified1DFloatFloatGrad:
3273 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3275 case NVPTXISD::TexUnified1DS32S32:
3276 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3278 case NVPTXISD::TexUnified1DS32Float:
3279 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3281 case NVPTXISD::TexUnified1DS32FloatLevel:
3282 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3284 case NVPTXISD::TexUnified1DS32FloatGrad:
3285 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3287 case NVPTXISD::TexUnified1DU32S32:
3288 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3290 case NVPTXISD::TexUnified1DU32Float:
3291 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3293 case NVPTXISD::TexUnified1DU32FloatLevel:
3294 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3296 case NVPTXISD::TexUnified1DU32FloatGrad:
3297 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3299 case NVPTXISD::TexUnified1DArrayFloatS32:
3300 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3302 case NVPTXISD::TexUnified1DArrayFloatFloat:
3303 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3305 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
3306 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3308 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
3309 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3311 case NVPTXISD::TexUnified1DArrayS32S32:
3312 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3314 case NVPTXISD::TexUnified1DArrayS32Float:
3315 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3317 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
3318 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3320 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
3321 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3323 case NVPTXISD::TexUnified1DArrayU32S32:
3324 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3326 case NVPTXISD::TexUnified1DArrayU32Float:
3327 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3329 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
3330 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3332 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
3333 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3335 case NVPTXISD::TexUnified2DFloatS32:
3336 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3338 case NVPTXISD::TexUnified2DFloatFloat:
3339 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3341 case NVPTXISD::TexUnified2DFloatFloatLevel:
3342 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3344 case NVPTXISD::TexUnified2DFloatFloatGrad:
3345 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3347 case NVPTXISD::TexUnified2DS32S32:
3348 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3350 case NVPTXISD::TexUnified2DS32Float:
3351 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3353 case NVPTXISD::TexUnified2DS32FloatLevel:
3354 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3356 case NVPTXISD::TexUnified2DS32FloatGrad:
3357 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3359 case NVPTXISD::TexUnified2DU32S32:
3360 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3362 case NVPTXISD::TexUnified2DU32Float:
3363 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3365 case NVPTXISD::TexUnified2DU32FloatLevel:
3366 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3368 case NVPTXISD::TexUnified2DU32FloatGrad:
3369 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3371 case NVPTXISD::TexUnified2DArrayFloatS32:
3372 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3374 case NVPTXISD::TexUnified2DArrayFloatFloat:
3375 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3377 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
3378 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3380 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
3381 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3383 case NVPTXISD::TexUnified2DArrayS32S32:
3384 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3386 case NVPTXISD::TexUnified2DArrayS32Float:
3387 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3389 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
3390 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3392 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
3393 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3395 case NVPTXISD::TexUnified2DArrayU32S32:
3396 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3398 case NVPTXISD::TexUnified2DArrayU32Float:
3399 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3401 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
3402 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3404 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
3405 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3407 case NVPTXISD::TexUnified3DFloatS32:
3408 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3410 case NVPTXISD::TexUnified3DFloatFloat:
3411 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3413 case NVPTXISD::TexUnified3DFloatFloatLevel:
3414 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3416 case NVPTXISD::TexUnified3DFloatFloatGrad:
3417 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3419 case NVPTXISD::TexUnified3DS32S32:
3420 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3422 case NVPTXISD::TexUnified3DS32Float:
3423 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3425 case NVPTXISD::TexUnified3DS32FloatLevel:
3426 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3428 case NVPTXISD::TexUnified3DS32FloatGrad:
3429 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3431 case NVPTXISD::TexUnified3DU32S32:
3432 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3434 case NVPTXISD::TexUnified3DU32Float:
3435 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3437 case NVPTXISD::TexUnified3DU32FloatLevel:
3438 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3440 case NVPTXISD::TexUnified3DU32FloatGrad:
3441 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3443 case NVPTXISD::TexUnifiedCubeFloatFloat:
3444 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3446 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
3447 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3449 case NVPTXISD::TexUnifiedCubeS32Float:
3450 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3452 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
3453 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3455 case NVPTXISD::TexUnifiedCubeU32Float:
3456 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3458 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
3459 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3461 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
3462 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3464 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
3465 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3467 case NVPTXISD::TexUnifiedCubeArrayS32Float:
3468 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3470 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
3471 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3473 case NVPTXISD::TexUnifiedCubeArrayU32Float:
3474 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3476 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
3477 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3479 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
3480 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3482 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
3483 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3485 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
3486 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3488 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
3489 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3491 case NVPTXISD::Tld4UnifiedR2DS64Float:
3492 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3494 case NVPTXISD::Tld4UnifiedG2DS64Float:
3495 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3497 case NVPTXISD::Tld4UnifiedB2DS64Float:
3498 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3500 case NVPTXISD::Tld4UnifiedA2DS64Float:
3501 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3503 case NVPTXISD::Tld4UnifiedR2DU64Float:
3504 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3506 case NVPTXISD::Tld4UnifiedG2DU64Float:
3507 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3509 case NVPTXISD::Tld4UnifiedB2DU64Float:
3510 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3512 case NVPTXISD::Tld4UnifiedA2DU64Float:
3513 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
3517 // Copy over operands
3518 for (unsigned i = 1; i < N->getNumOperands(); ++i) {
3519 Ops.push_back(N->getOperand(i));
3522 Ops.push_back(Chain);
3523 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3527 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
3528 SDValue Chain = N->getOperand(0);
3529 SDValue TexHandle = N->getOperand(1);
3530 SDNode *Ret = nullptr;
3532 SmallVector<SDValue, 8> Ops;
3533 switch (N->getOpcode()) {
3534 default: return nullptr;
3535 case NVPTXISD::Suld1DI8Clamp:
3536 Opc = NVPTX::SULD_1D_I8_CLAMP;
3537 Ops.push_back(TexHandle);
3538 Ops.push_back(N->getOperand(2));
3539 Ops.push_back(Chain);
3541 case NVPTXISD::Suld1DI16Clamp:
3542 Opc = NVPTX::SULD_1D_I16_CLAMP;
3543 Ops.push_back(TexHandle);
3544 Ops.push_back(N->getOperand(2));
3545 Ops.push_back(Chain);
3547 case NVPTXISD::Suld1DI32Clamp:
3548 Opc = NVPTX::SULD_1D_I32_CLAMP;
3549 Ops.push_back(TexHandle);
3550 Ops.push_back(N->getOperand(2));
3551 Ops.push_back(Chain);
3553 case NVPTXISD::Suld1DI64Clamp:
3554 Opc = NVPTX::SULD_1D_I64_CLAMP;
3555 Ops.push_back(TexHandle);
3556 Ops.push_back(N->getOperand(2));
3557 Ops.push_back(Chain);
3559 case NVPTXISD::Suld1DV2I8Clamp:
3560 Opc = NVPTX::SULD_1D_V2I8_CLAMP;
3561 Ops.push_back(TexHandle);
3562 Ops.push_back(N->getOperand(2));
3563 Ops.push_back(Chain);
3565 case NVPTXISD::Suld1DV2I16Clamp:
3566 Opc = NVPTX::SULD_1D_V2I16_CLAMP;
3567 Ops.push_back(TexHandle);
3568 Ops.push_back(N->getOperand(2));
3569 Ops.push_back(Chain);
3571 case NVPTXISD::Suld1DV2I32Clamp:
3572 Opc = NVPTX::SULD_1D_V2I32_CLAMP;
3573 Ops.push_back(TexHandle);
3574 Ops.push_back(N->getOperand(2));
3575 Ops.push_back(Chain);
3577 case NVPTXISD::Suld1DV2I64Clamp:
3578 Opc = NVPTX::SULD_1D_V2I64_CLAMP;
3579 Ops.push_back(TexHandle);
3580 Ops.push_back(N->getOperand(2));
3581 Ops.push_back(Chain);
3583 case NVPTXISD::Suld1DV4I8Clamp:
3584 Opc = NVPTX::SULD_1D_V4I8_CLAMP;
3585 Ops.push_back(TexHandle);
3586 Ops.push_back(N->getOperand(2));
3587 Ops.push_back(Chain);
3589 case NVPTXISD::Suld1DV4I16Clamp:
3590 Opc = NVPTX::SULD_1D_V4I16_CLAMP;
3591 Ops.push_back(TexHandle);
3592 Ops.push_back(N->getOperand(2));
3593 Ops.push_back(Chain);
3595 case NVPTXISD::Suld1DV4I32Clamp:
3596 Opc = NVPTX::SULD_1D_V4I32_CLAMP;
3597 Ops.push_back(TexHandle);
3598 Ops.push_back(N->getOperand(2));
3599 Ops.push_back(Chain);
3601 case NVPTXISD::Suld1DArrayI8Clamp:
3602 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
3603 Ops.push_back(TexHandle);
3604 Ops.push_back(N->getOperand(2));
3605 Ops.push_back(N->getOperand(3));
3606 Ops.push_back(Chain);
3608 case NVPTXISD::Suld1DArrayI16Clamp:
3609 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
3610 Ops.push_back(TexHandle);
3611 Ops.push_back(N->getOperand(2));
3612 Ops.push_back(N->getOperand(3));
3613 Ops.push_back(Chain);
3615 case NVPTXISD::Suld1DArrayI32Clamp:
3616 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
3617 Ops.push_back(TexHandle);
3618 Ops.push_back(N->getOperand(2));
3619 Ops.push_back(N->getOperand(3));
3620 Ops.push_back(Chain);
3622 case NVPTXISD::Suld1DArrayI64Clamp:
3623 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
3624 Ops.push_back(TexHandle);
3625 Ops.push_back(N->getOperand(2));
3626 Ops.push_back(N->getOperand(3));
3627 Ops.push_back(Chain);
3629 case NVPTXISD::Suld1DArrayV2I8Clamp:
3630 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
3631 Ops.push_back(TexHandle);
3632 Ops.push_back(N->getOperand(2));
3633 Ops.push_back(N->getOperand(3));
3634 Ops.push_back(Chain);
3636 case NVPTXISD::Suld1DArrayV2I16Clamp:
3637 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
3638 Ops.push_back(TexHandle);
3639 Ops.push_back(N->getOperand(2));
3640 Ops.push_back(N->getOperand(3));
3641 Ops.push_back(Chain);
3643 case NVPTXISD::Suld1DArrayV2I32Clamp:
3644 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
3645 Ops.push_back(TexHandle);
3646 Ops.push_back(N->getOperand(2));
3647 Ops.push_back(N->getOperand(3));
3648 Ops.push_back(Chain);
3650 case NVPTXISD::Suld1DArrayV2I64Clamp:
3651 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
3652 Ops.push_back(TexHandle);
3653 Ops.push_back(N->getOperand(2));
3654 Ops.push_back(N->getOperand(3));
3655 Ops.push_back(Chain);
3657 case NVPTXISD::Suld1DArrayV4I8Clamp:
3658 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
3659 Ops.push_back(TexHandle);
3660 Ops.push_back(N->getOperand(2));
3661 Ops.push_back(N->getOperand(3));
3662 Ops.push_back(Chain);
3664 case NVPTXISD::Suld1DArrayV4I16Clamp:
3665 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
3666 Ops.push_back(TexHandle);
3667 Ops.push_back(N->getOperand(2));
3668 Ops.push_back(N->getOperand(3));
3669 Ops.push_back(Chain);
3671 case NVPTXISD::Suld1DArrayV4I32Clamp:
3672 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
3673 Ops.push_back(TexHandle);
3674 Ops.push_back(N->getOperand(2));
3675 Ops.push_back(N->getOperand(3));
3676 Ops.push_back(Chain);
3678 case NVPTXISD::Suld2DI8Clamp:
3679 Opc = NVPTX::SULD_2D_I8_CLAMP;
3680 Ops.push_back(TexHandle);
3681 Ops.push_back(N->getOperand(2));
3682 Ops.push_back(N->getOperand(3));
3683 Ops.push_back(Chain);
3685 case NVPTXISD::Suld2DI16Clamp:
3686 Opc = NVPTX::SULD_2D_I16_CLAMP;
3687 Ops.push_back(TexHandle);
3688 Ops.push_back(N->getOperand(2));
3689 Ops.push_back(N->getOperand(3));
3690 Ops.push_back(Chain);
3692 case NVPTXISD::Suld2DI32Clamp:
3693 Opc = NVPTX::SULD_2D_I32_CLAMP;
3694 Ops.push_back(TexHandle);
3695 Ops.push_back(N->getOperand(2));
3696 Ops.push_back(N->getOperand(3));
3697 Ops.push_back(Chain);
3699 case NVPTXISD::Suld2DI64Clamp:
3700 Opc = NVPTX::SULD_2D_I64_CLAMP;
3701 Ops.push_back(TexHandle);
3702 Ops.push_back(N->getOperand(2));
3703 Ops.push_back(N->getOperand(3));
3704 Ops.push_back(Chain);
3706 case NVPTXISD::Suld2DV2I8Clamp:
3707 Opc = NVPTX::SULD_2D_V2I8_CLAMP;
3708 Ops.push_back(TexHandle);
3709 Ops.push_back(N->getOperand(2));
3710 Ops.push_back(N->getOperand(3));
3711 Ops.push_back(Chain);
3713 case NVPTXISD::Suld2DV2I16Clamp:
3714 Opc = NVPTX::SULD_2D_V2I16_CLAMP;
3715 Ops.push_back(TexHandle);
3716 Ops.push_back(N->getOperand(2));
3717 Ops.push_back(N->getOperand(3));
3718 Ops.push_back(Chain);
3720 case NVPTXISD::Suld2DV2I32Clamp:
3721 Opc = NVPTX::SULD_2D_V2I32_CLAMP;
3722 Ops.push_back(TexHandle);
3723 Ops.push_back(N->getOperand(2));
3724 Ops.push_back(N->getOperand(3));
3725 Ops.push_back(Chain);
3727 case NVPTXISD::Suld2DV2I64Clamp:
3728 Opc = NVPTX::SULD_2D_V2I64_CLAMP;
3729 Ops.push_back(TexHandle);
3730 Ops.push_back(N->getOperand(2));
3731 Ops.push_back(N->getOperand(3));
3732 Ops.push_back(Chain);
3734 case NVPTXISD::Suld2DV4I8Clamp:
3735 Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3736 Ops.push_back(TexHandle);
3737 Ops.push_back(N->getOperand(2));
3738 Ops.push_back(N->getOperand(3));
3739 Ops.push_back(Chain);
3741 case NVPTXISD::Suld2DV4I16Clamp:
3742 Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3743 Ops.push_back(TexHandle);
3744 Ops.push_back(N->getOperand(2));
3745 Ops.push_back(N->getOperand(3));
3746 Ops.push_back(Chain);
3748 case NVPTXISD::Suld2DV4I32Clamp:
3749 Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3750 Ops.push_back(TexHandle);
3751 Ops.push_back(N->getOperand(2));
3752 Ops.push_back(N->getOperand(3));
3753 Ops.push_back(Chain);
3755 case NVPTXISD::Suld2DArrayI8Clamp:
3756 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3757 Ops.push_back(TexHandle);
3758 Ops.push_back(N->getOperand(2));
3759 Ops.push_back(N->getOperand(3));
3760 Ops.push_back(N->getOperand(4));
3761 Ops.push_back(Chain);
3763 case NVPTXISD::Suld2DArrayI16Clamp:
3764 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3765 Ops.push_back(TexHandle);
3766 Ops.push_back(N->getOperand(2));
3767 Ops.push_back(N->getOperand(3));
3768 Ops.push_back(N->getOperand(4));
3769 Ops.push_back(Chain);
3771 case NVPTXISD::Suld2DArrayI32Clamp:
3772 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3773 Ops.push_back(TexHandle);
3774 Ops.push_back(N->getOperand(2));
3775 Ops.push_back(N->getOperand(3));
3776 Ops.push_back(N->getOperand(4));
3777 Ops.push_back(Chain);
3779 case NVPTXISD::Suld2DArrayI64Clamp:
3780 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3781 Ops.push_back(TexHandle);
3782 Ops.push_back(N->getOperand(2));
3783 Ops.push_back(N->getOperand(3));
3784 Ops.push_back(N->getOperand(4));
3785 Ops.push_back(Chain);
3787 case NVPTXISD::Suld2DArrayV2I8Clamp:
3788 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3789 Ops.push_back(TexHandle);
3790 Ops.push_back(N->getOperand(2));
3791 Ops.push_back(N->getOperand(3));
3792 Ops.push_back(N->getOperand(4));
3793 Ops.push_back(Chain);
3795 case NVPTXISD::Suld2DArrayV2I16Clamp:
3796 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3797 Ops.push_back(TexHandle);
3798 Ops.push_back(N->getOperand(2));
3799 Ops.push_back(N->getOperand(3));
3800 Ops.push_back(N->getOperand(4));
3801 Ops.push_back(Chain);
3803 case NVPTXISD::Suld2DArrayV2I32Clamp:
3804 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3805 Ops.push_back(TexHandle);
3806 Ops.push_back(N->getOperand(2));
3807 Ops.push_back(N->getOperand(3));
3808 Ops.push_back(N->getOperand(4));
3809 Ops.push_back(Chain);
3811 case NVPTXISD::Suld2DArrayV2I64Clamp:
3812 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3813 Ops.push_back(TexHandle);
3814 Ops.push_back(N->getOperand(2));
3815 Ops.push_back(N->getOperand(3));
3816 Ops.push_back(N->getOperand(4));
3817 Ops.push_back(Chain);
3819 case NVPTXISD::Suld2DArrayV4I8Clamp:
3820 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3821 Ops.push_back(TexHandle);
3822 Ops.push_back(N->getOperand(2));
3823 Ops.push_back(N->getOperand(3));
3824 Ops.push_back(N->getOperand(4));
3825 Ops.push_back(Chain);
3827 case NVPTXISD::Suld2DArrayV4I16Clamp:
3828 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3829 Ops.push_back(TexHandle);
3830 Ops.push_back(N->getOperand(2));
3831 Ops.push_back(N->getOperand(3));
3832 Ops.push_back(N->getOperand(4));
3833 Ops.push_back(Chain);
3835 case NVPTXISD::Suld2DArrayV4I32Clamp:
3836 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3837 Ops.push_back(TexHandle);
3838 Ops.push_back(N->getOperand(2));
3839 Ops.push_back(N->getOperand(3));
3840 Ops.push_back(N->getOperand(4));
3841 Ops.push_back(Chain);
3843 case NVPTXISD::Suld3DI8Clamp:
3844 Opc = NVPTX::SULD_3D_I8_CLAMP;
3845 Ops.push_back(TexHandle);
3846 Ops.push_back(N->getOperand(2));
3847 Ops.push_back(N->getOperand(3));
3848 Ops.push_back(N->getOperand(4));
3849 Ops.push_back(Chain);
3851 case NVPTXISD::Suld3DI16Clamp:
3852 Opc = NVPTX::SULD_3D_I16_CLAMP;
3853 Ops.push_back(TexHandle);
3854 Ops.push_back(N->getOperand(2));
3855 Ops.push_back(N->getOperand(3));
3856 Ops.push_back(N->getOperand(4));
3857 Ops.push_back(Chain);
3859 case NVPTXISD::Suld3DI32Clamp:
3860 Opc = NVPTX::SULD_3D_I32_CLAMP;
3861 Ops.push_back(TexHandle);
3862 Ops.push_back(N->getOperand(2));
3863 Ops.push_back(N->getOperand(3));
3864 Ops.push_back(N->getOperand(4));
3865 Ops.push_back(Chain);
3867 case NVPTXISD::Suld3DI64Clamp:
3868 Opc = NVPTX::SULD_3D_I64_CLAMP;
3869 Ops.push_back(TexHandle);
3870 Ops.push_back(N->getOperand(2));
3871 Ops.push_back(N->getOperand(3));
3872 Ops.push_back(N->getOperand(4));
3873 Ops.push_back(Chain);
3875 case NVPTXISD::Suld3DV2I8Clamp:
3876 Opc = NVPTX::SULD_3D_V2I8_CLAMP;
3877 Ops.push_back(TexHandle);
3878 Ops.push_back(N->getOperand(2));
3879 Ops.push_back(N->getOperand(3));
3880 Ops.push_back(N->getOperand(4));
3881 Ops.push_back(Chain);
3883 case NVPTXISD::Suld3DV2I16Clamp:
3884 Opc = NVPTX::SULD_3D_V2I16_CLAMP;
3885 Ops.push_back(TexHandle);
3886 Ops.push_back(N->getOperand(2));
3887 Ops.push_back(N->getOperand(3));
3888 Ops.push_back(N->getOperand(4));
3889 Ops.push_back(Chain);
3891 case NVPTXISD::Suld3DV2I32Clamp:
3892 Opc = NVPTX::SULD_3D_V2I32_CLAMP;
3893 Ops.push_back(TexHandle);
3894 Ops.push_back(N->getOperand(2));
3895 Ops.push_back(N->getOperand(3));
3896 Ops.push_back(N->getOperand(4));
3897 Ops.push_back(Chain);
3899 case NVPTXISD::Suld3DV2I64Clamp:
3900 Opc = NVPTX::SULD_3D_V2I64_CLAMP;
3901 Ops.push_back(TexHandle);
3902 Ops.push_back(N->getOperand(2));
3903 Ops.push_back(N->getOperand(3));
3904 Ops.push_back(N->getOperand(4));
3905 Ops.push_back(Chain);
3907 case NVPTXISD::Suld3DV4I8Clamp:
3908 Opc = NVPTX::SULD_3D_V4I8_CLAMP;
3909 Ops.push_back(TexHandle);
3910 Ops.push_back(N->getOperand(2));
3911 Ops.push_back(N->getOperand(3));
3912 Ops.push_back(N->getOperand(4));
3913 Ops.push_back(Chain);
3915 case NVPTXISD::Suld3DV4I16Clamp:
3916 Opc = NVPTX::SULD_3D_V4I16_CLAMP;
3917 Ops.push_back(TexHandle);
3918 Ops.push_back(N->getOperand(2));
3919 Ops.push_back(N->getOperand(3));
3920 Ops.push_back(N->getOperand(4));
3921 Ops.push_back(Chain);
3923 case NVPTXISD::Suld3DV4I32Clamp:
3924 Opc = NVPTX::SULD_3D_V4I32_CLAMP;
3925 Ops.push_back(TexHandle);
3926 Ops.push_back(N->getOperand(2));
3927 Ops.push_back(N->getOperand(3));
3928 Ops.push_back(N->getOperand(4));
3929 Ops.push_back(Chain);
3931 case NVPTXISD::Suld1DI8Trap:
3932 Opc = NVPTX::SULD_1D_I8_TRAP;
3933 Ops.push_back(TexHandle);
3934 Ops.push_back(N->getOperand(2));
3935 Ops.push_back(Chain);
3937 case NVPTXISD::Suld1DI16Trap:
3938 Opc = NVPTX::SULD_1D_I16_TRAP;
3939 Ops.push_back(TexHandle);
3940 Ops.push_back(N->getOperand(2));
3941 Ops.push_back(Chain);
3943 case NVPTXISD::Suld1DI32Trap:
3944 Opc = NVPTX::SULD_1D_I32_TRAP;
3945 Ops.push_back(TexHandle);
3946 Ops.push_back(N->getOperand(2));
3947 Ops.push_back(Chain);
3949 case NVPTXISD::Suld1DI64Trap:
3950 Opc = NVPTX::SULD_1D_I64_TRAP;
3951 Ops.push_back(TexHandle);
3952 Ops.push_back(N->getOperand(2));
3953 Ops.push_back(Chain);
3955 case NVPTXISD::Suld1DV2I8Trap:
3956 Opc = NVPTX::SULD_1D_V2I8_TRAP;
3957 Ops.push_back(TexHandle);
3958 Ops.push_back(N->getOperand(2));
3959 Ops.push_back(Chain);
3961 case NVPTXISD::Suld1DV2I16Trap:
3962 Opc = NVPTX::SULD_1D_V2I16_TRAP;
3963 Ops.push_back(TexHandle);
3964 Ops.push_back(N->getOperand(2));
3965 Ops.push_back(Chain);
3967 case NVPTXISD::Suld1DV2I32Trap:
3968 Opc = NVPTX::SULD_1D_V2I32_TRAP;
3969 Ops.push_back(TexHandle);
3970 Ops.push_back(N->getOperand(2));
3971 Ops.push_back(Chain);
3973 case NVPTXISD::Suld1DV2I64Trap:
3974 Opc = NVPTX::SULD_1D_V2I64_TRAP;
3975 Ops.push_back(TexHandle);
3976 Ops.push_back(N->getOperand(2));
3977 Ops.push_back(Chain);
3979 case NVPTXISD::Suld1DV4I8Trap:
3980 Opc = NVPTX::SULD_1D_V4I8_TRAP;
3981 Ops.push_back(TexHandle);
3982 Ops.push_back(N->getOperand(2));
3983 Ops.push_back(Chain);
3985 case NVPTXISD::Suld1DV4I16Trap:
3986 Opc = NVPTX::SULD_1D_V4I16_TRAP;
3987 Ops.push_back(TexHandle);
3988 Ops.push_back(N->getOperand(2));
3989 Ops.push_back(Chain);
3991 case NVPTXISD::Suld1DV4I32Trap:
3992 Opc = NVPTX::SULD_1D_V4I32_TRAP;
3993 Ops.push_back(TexHandle);
3994 Ops.push_back(N->getOperand(2));
3995 Ops.push_back(Chain);
3997 case NVPTXISD::Suld1DArrayI8Trap:
3998 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
3999 Ops.push_back(TexHandle);
4000 Ops.push_back(N->getOperand(2));
4001 Ops.push_back(N->getOperand(3));
4002 Ops.push_back(Chain);
4004 case NVPTXISD::Suld1DArrayI16Trap:
4005 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4006 Ops.push_back(TexHandle);
4007 Ops.push_back(N->getOperand(2));
4008 Ops.push_back(N->getOperand(3));
4009 Ops.push_back(Chain);
4011 case NVPTXISD::Suld1DArrayI32Trap:
4012 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4013 Ops.push_back(TexHandle);
4014 Ops.push_back(N->getOperand(2));
4015 Ops.push_back(N->getOperand(3));
4016 Ops.push_back(Chain);
4018 case NVPTXISD::Suld1DArrayI64Trap:
4019 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4020 Ops.push_back(TexHandle);
4021 Ops.push_back(N->getOperand(2));
4022 Ops.push_back(N->getOperand(3));
4023 Ops.push_back(Chain);
4025 case NVPTXISD::Suld1DArrayV2I8Trap:
4026 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4027 Ops.push_back(TexHandle);
4028 Ops.push_back(N->getOperand(2));
4029 Ops.push_back(N->getOperand(3));
4030 Ops.push_back(Chain);
4032 case NVPTXISD::Suld1DArrayV2I16Trap:
4033 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4034 Ops.push_back(TexHandle);
4035 Ops.push_back(N->getOperand(2));
4036 Ops.push_back(N->getOperand(3));
4037 Ops.push_back(Chain);
4039 case NVPTXISD::Suld1DArrayV2I32Trap:
4040 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4041 Ops.push_back(TexHandle);
4042 Ops.push_back(N->getOperand(2));
4043 Ops.push_back(N->getOperand(3));
4044 Ops.push_back(Chain);
4046 case NVPTXISD::Suld1DArrayV2I64Trap:
4047 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4048 Ops.push_back(TexHandle);
4049 Ops.push_back(N->getOperand(2));
4050 Ops.push_back(N->getOperand(3));
4051 Ops.push_back(Chain);
4053 case NVPTXISD::Suld1DArrayV4I8Trap:
4054 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4055 Ops.push_back(TexHandle);
4056 Ops.push_back(N->getOperand(2));
4057 Ops.push_back(N->getOperand(3));
4058 Ops.push_back(Chain);
4060 case NVPTXISD::Suld1DArrayV4I16Trap:
4061 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4062 Ops.push_back(TexHandle);
4063 Ops.push_back(N->getOperand(2));
4064 Ops.push_back(N->getOperand(3));
4065 Ops.push_back(Chain);
4067 case NVPTXISD::Suld1DArrayV4I32Trap:
4068 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4069 Ops.push_back(TexHandle);
4070 Ops.push_back(N->getOperand(2));
4071 Ops.push_back(N->getOperand(3));
4072 Ops.push_back(Chain);
4074 case NVPTXISD::Suld2DI8Trap:
4075 Opc = NVPTX::SULD_2D_I8_TRAP;
4076 Ops.push_back(TexHandle);
4077 Ops.push_back(N->getOperand(2));
4078 Ops.push_back(N->getOperand(3));
4079 Ops.push_back(Chain);
4081 case NVPTXISD::Suld2DI16Trap:
4082 Opc = NVPTX::SULD_2D_I16_TRAP;
4083 Ops.push_back(TexHandle);
4084 Ops.push_back(N->getOperand(2));
4085 Ops.push_back(N->getOperand(3));
4086 Ops.push_back(Chain);
4088 case NVPTXISD::Suld2DI32Trap:
4089 Opc = NVPTX::SULD_2D_I32_TRAP;
4090 Ops.push_back(TexHandle);
4091 Ops.push_back(N->getOperand(2));
4092 Ops.push_back(N->getOperand(3));
4093 Ops.push_back(Chain);
4095 case NVPTXISD::Suld2DI64Trap:
4096 Opc = NVPTX::SULD_2D_I64_TRAP;
4097 Ops.push_back(TexHandle);
4098 Ops.push_back(N->getOperand(2));
4099 Ops.push_back(N->getOperand(3));
4100 Ops.push_back(Chain);
4102 case NVPTXISD::Suld2DV2I8Trap:
4103 Opc = NVPTX::SULD_2D_V2I8_TRAP;
4104 Ops.push_back(TexHandle);
4105 Ops.push_back(N->getOperand(2));
4106 Ops.push_back(N->getOperand(3));
4107 Ops.push_back(Chain);
4109 case NVPTXISD::Suld2DV2I16Trap:
4110 Opc = NVPTX::SULD_2D_V2I16_TRAP;
4111 Ops.push_back(TexHandle);
4112 Ops.push_back(N->getOperand(2));
4113 Ops.push_back(N->getOperand(3));
4114 Ops.push_back(Chain);
4116 case NVPTXISD::Suld2DV2I32Trap:
4117 Opc = NVPTX::SULD_2D_V2I32_TRAP;
4118 Ops.push_back(TexHandle);
4119 Ops.push_back(N->getOperand(2));
4120 Ops.push_back(N->getOperand(3));
4121 Ops.push_back(Chain);
4123 case NVPTXISD::Suld2DV2I64Trap:
4124 Opc = NVPTX::SULD_2D_V2I64_TRAP;
4125 Ops.push_back(TexHandle);
4126 Ops.push_back(N->getOperand(2));
4127 Ops.push_back(N->getOperand(3));
4128 Ops.push_back(Chain);
4130 case NVPTXISD::Suld2DV4I8Trap:
4131 Opc = NVPTX::SULD_2D_V4I8_TRAP;
4132 Ops.push_back(TexHandle);
4133 Ops.push_back(N->getOperand(2));
4134 Ops.push_back(N->getOperand(3));
4135 Ops.push_back(Chain);
4137 case NVPTXISD::Suld2DV4I16Trap:
4138 Opc = NVPTX::SULD_2D_V4I16_TRAP;
4139 Ops.push_back(TexHandle);
4140 Ops.push_back(N->getOperand(2));
4141 Ops.push_back(N->getOperand(3));
4142 Ops.push_back(Chain);
4144 case NVPTXISD::Suld2DV4I32Trap:
4145 Opc = NVPTX::SULD_2D_V4I32_TRAP;
4146 Ops.push_back(TexHandle);
4147 Ops.push_back(N->getOperand(2));
4148 Ops.push_back(N->getOperand(3));
4149 Ops.push_back(Chain);
4151 case NVPTXISD::Suld2DArrayI8Trap:
4152 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4153 Ops.push_back(TexHandle);
4154 Ops.push_back(N->getOperand(2));
4155 Ops.push_back(N->getOperand(3));
4156 Ops.push_back(N->getOperand(4));
4157 Ops.push_back(Chain);
4159 case NVPTXISD::Suld2DArrayI16Trap:
4160 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4161 Ops.push_back(TexHandle);
4162 Ops.push_back(N->getOperand(2));
4163 Ops.push_back(N->getOperand(3));
4164 Ops.push_back(N->getOperand(4));
4165 Ops.push_back(Chain);
4167 case NVPTXISD::Suld2DArrayI32Trap:
4168 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4169 Ops.push_back(TexHandle);
4170 Ops.push_back(N->getOperand(2));
4171 Ops.push_back(N->getOperand(3));
4172 Ops.push_back(N->getOperand(4));
4173 Ops.push_back(Chain);
4175 case NVPTXISD::Suld2DArrayI64Trap:
4176 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4177 Ops.push_back(TexHandle);
4178 Ops.push_back(N->getOperand(2));
4179 Ops.push_back(N->getOperand(3));
4180 Ops.push_back(N->getOperand(4));
4181 Ops.push_back(Chain);
4183 case NVPTXISD::Suld2DArrayV2I8Trap:
4184 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4185 Ops.push_back(TexHandle);
4186 Ops.push_back(N->getOperand(2));
4187 Ops.push_back(N->getOperand(3));
4188 Ops.push_back(N->getOperand(4));
4189 Ops.push_back(Chain);
4191 case NVPTXISD::Suld2DArrayV2I16Trap:
4192 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4193 Ops.push_back(TexHandle);
4194 Ops.push_back(N->getOperand(2));
4195 Ops.push_back(N->getOperand(3));
4196 Ops.push_back(N->getOperand(4));
4197 Ops.push_back(Chain);
4199 case NVPTXISD::Suld2DArrayV2I32Trap:
4200 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4201 Ops.push_back(TexHandle);
4202 Ops.push_back(N->getOperand(2));
4203 Ops.push_back(N->getOperand(3));
4204 Ops.push_back(N->getOperand(4));
4205 Ops.push_back(Chain);
4207 case NVPTXISD::Suld2DArrayV2I64Trap:
4208 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4209 Ops.push_back(TexHandle);
4210 Ops.push_back(N->getOperand(2));
4211 Ops.push_back(N->getOperand(3));
4212 Ops.push_back(N->getOperand(4));
4213 Ops.push_back(Chain);
4215 case NVPTXISD::Suld2DArrayV4I8Trap:
4216 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4217 Ops.push_back(TexHandle);
4218 Ops.push_back(N->getOperand(2));
4219 Ops.push_back(N->getOperand(3));
4220 Ops.push_back(N->getOperand(4));
4221 Ops.push_back(Chain);
4223 case NVPTXISD::Suld2DArrayV4I16Trap:
4224 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4225 Ops.push_back(TexHandle);
4226 Ops.push_back(N->getOperand(2));
4227 Ops.push_back(N->getOperand(3));
4228 Ops.push_back(N->getOperand(4));
4229 Ops.push_back(Chain);
4231 case NVPTXISD::Suld2DArrayV4I32Trap:
4232 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4233 Ops.push_back(TexHandle);
4234 Ops.push_back(N->getOperand(2));
4235 Ops.push_back(N->getOperand(3));
4236 Ops.push_back(N->getOperand(4));
4237 Ops.push_back(Chain);
4239 case NVPTXISD::Suld3DI8Trap:
4240 Opc = NVPTX::SULD_3D_I8_TRAP;
4241 Ops.push_back(TexHandle);
4242 Ops.push_back(N->getOperand(2));
4243 Ops.push_back(N->getOperand(3));
4244 Ops.push_back(N->getOperand(4));
4245 Ops.push_back(Chain);
4247 case NVPTXISD::Suld3DI16Trap:
4248 Opc = NVPTX::SULD_3D_I16_TRAP;
4249 Ops.push_back(TexHandle);
4250 Ops.push_back(N->getOperand(2));
4251 Ops.push_back(N->getOperand(3));
4252 Ops.push_back(N->getOperand(4));
4253 Ops.push_back(Chain);
4255 case NVPTXISD::Suld3DI32Trap:
4256 Opc = NVPTX::SULD_3D_I32_TRAP;
4257 Ops.push_back(TexHandle);
4258 Ops.push_back(N->getOperand(2));
4259 Ops.push_back(N->getOperand(3));
4260 Ops.push_back(N->getOperand(4));
4261 Ops.push_back(Chain);
4263 case NVPTXISD::Suld3DI64Trap:
4264 Opc = NVPTX::SULD_3D_I64_TRAP;
4265 Ops.push_back(TexHandle);
4266 Ops.push_back(N->getOperand(2));
4267 Ops.push_back(N->getOperand(3));
4268 Ops.push_back(N->getOperand(4));
4269 Ops.push_back(Chain);
4271 case NVPTXISD::Suld3DV2I8Trap:
4272 Opc = NVPTX::SULD_3D_V2I8_TRAP;
4273 Ops.push_back(TexHandle);
4274 Ops.push_back(N->getOperand(2));
4275 Ops.push_back(N->getOperand(3));
4276 Ops.push_back(N->getOperand(4));
4277 Ops.push_back(Chain);
4279 case NVPTXISD::Suld3DV2I16Trap:
4280 Opc = NVPTX::SULD_3D_V2I16_TRAP;
4281 Ops.push_back(TexHandle);
4282 Ops.push_back(N->getOperand(2));
4283 Ops.push_back(N->getOperand(3));
4284 Ops.push_back(N->getOperand(4));
4285 Ops.push_back(Chain);
4287 case NVPTXISD::Suld3DV2I32Trap:
4288 Opc = NVPTX::SULD_3D_V2I32_TRAP;
4289 Ops.push_back(TexHandle);
4290 Ops.push_back(N->getOperand(2));
4291 Ops.push_back(N->getOperand(3));
4292 Ops.push_back(N->getOperand(4));
4293 Ops.push_back(Chain);
4295 case NVPTXISD::Suld3DV2I64Trap:
4296 Opc = NVPTX::SULD_3D_V2I64_TRAP;
4297 Ops.push_back(TexHandle);
4298 Ops.push_back(N->getOperand(2));
4299 Ops.push_back(N->getOperand(3));
4300 Ops.push_back(N->getOperand(4));
4301 Ops.push_back(Chain);
4303 case NVPTXISD::Suld3DV4I8Trap:
4304 Opc = NVPTX::SULD_3D_V4I8_TRAP;
4305 Ops.push_back(TexHandle);
4306 Ops.push_back(N->getOperand(2));
4307 Ops.push_back(N->getOperand(3));
4308 Ops.push_back(N->getOperand(4));
4309 Ops.push_back(Chain);
4311 case NVPTXISD::Suld3DV4I16Trap:
4312 Opc = NVPTX::SULD_3D_V4I16_TRAP;
4313 Ops.push_back(TexHandle);
4314 Ops.push_back(N->getOperand(2));
4315 Ops.push_back(N->getOperand(3));
4316 Ops.push_back(N->getOperand(4));
4317 Ops.push_back(Chain);
4319 case NVPTXISD::Suld3DV4I32Trap:
4320 Opc = NVPTX::SULD_3D_V4I32_TRAP;
4321 Ops.push_back(TexHandle);
4322 Ops.push_back(N->getOperand(2));
4323 Ops.push_back(N->getOperand(3));
4324 Ops.push_back(N->getOperand(4));
4325 Ops.push_back(Chain);
4327 case NVPTXISD::Suld1DI8Zero:
4328 Opc = NVPTX::SULD_1D_I8_ZERO;
4329 Ops.push_back(TexHandle);
4330 Ops.push_back(N->getOperand(2));
4331 Ops.push_back(Chain);
4333 case NVPTXISD::Suld1DI16Zero:
4334 Opc = NVPTX::SULD_1D_I16_ZERO;
4335 Ops.push_back(TexHandle);
4336 Ops.push_back(N->getOperand(2));
4337 Ops.push_back(Chain);
4339 case NVPTXISD::Suld1DI32Zero:
4340 Opc = NVPTX::SULD_1D_I32_ZERO;
4341 Ops.push_back(TexHandle);
4342 Ops.push_back(N->getOperand(2));
4343 Ops.push_back(Chain);
4345 case NVPTXISD::Suld1DI64Zero:
4346 Opc = NVPTX::SULD_1D_I64_ZERO;
4347 Ops.push_back(TexHandle);
4348 Ops.push_back(N->getOperand(2));
4349 Ops.push_back(Chain);
4351 case NVPTXISD::Suld1DV2I8Zero:
4352 Opc = NVPTX::SULD_1D_V2I8_ZERO;
4353 Ops.push_back(TexHandle);
4354 Ops.push_back(N->getOperand(2));
4355 Ops.push_back(Chain);
4357 case NVPTXISD::Suld1DV2I16Zero:
4358 Opc = NVPTX::SULD_1D_V2I16_ZERO;
4359 Ops.push_back(TexHandle);
4360 Ops.push_back(N->getOperand(2));
4361 Ops.push_back(Chain);
4363 case NVPTXISD::Suld1DV2I32Zero:
4364 Opc = NVPTX::SULD_1D_V2I32_ZERO;
4365 Ops.push_back(TexHandle);
4366 Ops.push_back(N->getOperand(2));
4367 Ops.push_back(Chain);
4369 case NVPTXISD::Suld1DV2I64Zero:
4370 Opc = NVPTX::SULD_1D_V2I64_ZERO;
4371 Ops.push_back(TexHandle);
4372 Ops.push_back(N->getOperand(2));
4373 Ops.push_back(Chain);
4375 case NVPTXISD::Suld1DV4I8Zero:
4376 Opc = NVPTX::SULD_1D_V4I8_ZERO;
4377 Ops.push_back(TexHandle);
4378 Ops.push_back(N->getOperand(2));
4379 Ops.push_back(Chain);
4381 case NVPTXISD::Suld1DV4I16Zero:
4382 Opc = NVPTX::SULD_1D_V4I16_ZERO;
4383 Ops.push_back(TexHandle);
4384 Ops.push_back(N->getOperand(2));
4385 Ops.push_back(Chain);
4387 case NVPTXISD::Suld1DV4I32Zero:
4388 Opc = NVPTX::SULD_1D_V4I32_ZERO;
4389 Ops.push_back(TexHandle);
4390 Ops.push_back(N->getOperand(2));
4391 Ops.push_back(Chain);
4393 case NVPTXISD::Suld1DArrayI8Zero:
4394 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4395 Ops.push_back(TexHandle);
4396 Ops.push_back(N->getOperand(2));
4397 Ops.push_back(N->getOperand(3));
4398 Ops.push_back(Chain);
4400 case NVPTXISD::Suld1DArrayI16Zero:
4401 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4402 Ops.push_back(TexHandle);
4403 Ops.push_back(N->getOperand(2));
4404 Ops.push_back(N->getOperand(3));
4405 Ops.push_back(Chain);
4407 case NVPTXISD::Suld1DArrayI32Zero:
4408 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4409 Ops.push_back(TexHandle);
4410 Ops.push_back(N->getOperand(2));
4411 Ops.push_back(N->getOperand(3));
4412 Ops.push_back(Chain);
4414 case NVPTXISD::Suld1DArrayI64Zero:
4415 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4416 Ops.push_back(TexHandle);
4417 Ops.push_back(N->getOperand(2));
4418 Ops.push_back(N->getOperand(3));
4419 Ops.push_back(Chain);
4421 case NVPTXISD::Suld1DArrayV2I8Zero:
4422 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4423 Ops.push_back(TexHandle);
4424 Ops.push_back(N->getOperand(2));
4425 Ops.push_back(N->getOperand(3));
4426 Ops.push_back(Chain);
4428 case NVPTXISD::Suld1DArrayV2I16Zero:
4429 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4430 Ops.push_back(TexHandle);
4431 Ops.push_back(N->getOperand(2));
4432 Ops.push_back(N->getOperand(3));
4433 Ops.push_back(Chain);
4435 case NVPTXISD::Suld1DArrayV2I32Zero:
4436 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4437 Ops.push_back(TexHandle);
4438 Ops.push_back(N->getOperand(2));
4439 Ops.push_back(N->getOperand(3));
4440 Ops.push_back(Chain);
4442 case NVPTXISD::Suld1DArrayV2I64Zero:
4443 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4444 Ops.push_back(TexHandle);
4445 Ops.push_back(N->getOperand(2));
4446 Ops.push_back(N->getOperand(3));
4447 Ops.push_back(Chain);
4449 case NVPTXISD::Suld1DArrayV4I8Zero:
4450 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4451 Ops.push_back(TexHandle);
4452 Ops.push_back(N->getOperand(2));
4453 Ops.push_back(N->getOperand(3));
4454 Ops.push_back(Chain);
4456 case NVPTXISD::Suld1DArrayV4I16Zero:
4457 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4458 Ops.push_back(TexHandle);
4459 Ops.push_back(N->getOperand(2));
4460 Ops.push_back(N->getOperand(3));
4461 Ops.push_back(Chain);
4463 case NVPTXISD::Suld1DArrayV4I32Zero:
4464 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4465 Ops.push_back(TexHandle);
4466 Ops.push_back(N->getOperand(2));
4467 Ops.push_back(N->getOperand(3));
4468 Ops.push_back(Chain);
4470 case NVPTXISD::Suld2DI8Zero:
4471 Opc = NVPTX::SULD_2D_I8_ZERO;
4472 Ops.push_back(TexHandle);
4473 Ops.push_back(N->getOperand(2));
4474 Ops.push_back(N->getOperand(3));
4475 Ops.push_back(Chain);
4477 case NVPTXISD::Suld2DI16Zero:
4478 Opc = NVPTX::SULD_2D_I16_ZERO;
4479 Ops.push_back(TexHandle);
4480 Ops.push_back(N->getOperand(2));
4481 Ops.push_back(N->getOperand(3));
4482 Ops.push_back(Chain);
4484 case NVPTXISD::Suld2DI32Zero:
4485 Opc = NVPTX::SULD_2D_I32_ZERO;
4486 Ops.push_back(TexHandle);
4487 Ops.push_back(N->getOperand(2));
4488 Ops.push_back(N->getOperand(3));
4489 Ops.push_back(Chain);
4491 case NVPTXISD::Suld2DI64Zero:
4492 Opc = NVPTX::SULD_2D_I64_ZERO;
4493 Ops.push_back(TexHandle);
4494 Ops.push_back(N->getOperand(2));
4495 Ops.push_back(N->getOperand(3));
4496 Ops.push_back(Chain);
4498 case NVPTXISD::Suld2DV2I8Zero:
4499 Opc = NVPTX::SULD_2D_V2I8_ZERO;
4500 Ops.push_back(TexHandle);
4501 Ops.push_back(N->getOperand(2));
4502 Ops.push_back(N->getOperand(3));
4503 Ops.push_back(Chain);
4505 case NVPTXISD::Suld2DV2I16Zero:
4506 Opc = NVPTX::SULD_2D_V2I16_ZERO;
4507 Ops.push_back(TexHandle);
4508 Ops.push_back(N->getOperand(2));
4509 Ops.push_back(N->getOperand(3));
4510 Ops.push_back(Chain);
4512 case NVPTXISD::Suld2DV2I32Zero:
4513 Opc = NVPTX::SULD_2D_V2I32_ZERO;
4514 Ops.push_back(TexHandle);
4515 Ops.push_back(N->getOperand(2));
4516 Ops.push_back(N->getOperand(3));
4517 Ops.push_back(Chain);
4519 case NVPTXISD::Suld2DV2I64Zero:
4520 Opc = NVPTX::SULD_2D_V2I64_ZERO;
4521 Ops.push_back(TexHandle);
4522 Ops.push_back(N->getOperand(2));
4523 Ops.push_back(N->getOperand(3));
4524 Ops.push_back(Chain);
4526 case NVPTXISD::Suld2DV4I8Zero:
4527 Opc = NVPTX::SULD_2D_V4I8_ZERO;
4528 Ops.push_back(TexHandle);
4529 Ops.push_back(N->getOperand(2));
4530 Ops.push_back(N->getOperand(3));
4531 Ops.push_back(Chain);
4533 case NVPTXISD::Suld2DV4I16Zero:
4534 Opc = NVPTX::SULD_2D_V4I16_ZERO;
4535 Ops.push_back(TexHandle);
4536 Ops.push_back(N->getOperand(2));
4537 Ops.push_back(N->getOperand(3));
4538 Ops.push_back(Chain);
4540 case NVPTXISD::Suld2DV4I32Zero:
4541 Opc = NVPTX::SULD_2D_V4I32_ZERO;
4542 Ops.push_back(TexHandle);
4543 Ops.push_back(N->getOperand(2));
4544 Ops.push_back(N->getOperand(3));
4545 Ops.push_back(Chain);
4547 case NVPTXISD::Suld2DArrayI8Zero:
4548 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
4549 Ops.push_back(TexHandle);
4550 Ops.push_back(N->getOperand(2));
4551 Ops.push_back(N->getOperand(3));
4552 Ops.push_back(N->getOperand(4));
4553 Ops.push_back(Chain);
4555 case NVPTXISD::Suld2DArrayI16Zero:
4556 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
4557 Ops.push_back(TexHandle);
4558 Ops.push_back(N->getOperand(2));
4559 Ops.push_back(N->getOperand(3));
4560 Ops.push_back(N->getOperand(4));
4561 Ops.push_back(Chain);
4563 case NVPTXISD::Suld2DArrayI32Zero:
4564 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
4565 Ops.push_back(TexHandle);
4566 Ops.push_back(N->getOperand(2));
4567 Ops.push_back(N->getOperand(3));
4568 Ops.push_back(N->getOperand(4));
4569 Ops.push_back(Chain);
4571 case NVPTXISD::Suld2DArrayI64Zero:
4572 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
4573 Ops.push_back(TexHandle);
4574 Ops.push_back(N->getOperand(2));
4575 Ops.push_back(N->getOperand(3));
4576 Ops.push_back(N->getOperand(4));
4577 Ops.push_back(Chain);
4579 case NVPTXISD::Suld2DArrayV2I8Zero:
4580 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
4581 Ops.push_back(TexHandle);
4582 Ops.push_back(N->getOperand(2));
4583 Ops.push_back(N->getOperand(3));
4584 Ops.push_back(N->getOperand(4));
4585 Ops.push_back(Chain);
4587 case NVPTXISD::Suld2DArrayV2I16Zero:
4588 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
4589 Ops.push_back(TexHandle);
4590 Ops.push_back(N->getOperand(2));
4591 Ops.push_back(N->getOperand(3));
4592 Ops.push_back(N->getOperand(4));
4593 Ops.push_back(Chain);
4595 case NVPTXISD::Suld2DArrayV2I32Zero:
4596 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
4597 Ops.push_back(TexHandle);
4598 Ops.push_back(N->getOperand(2));
4599 Ops.push_back(N->getOperand(3));
4600 Ops.push_back(N->getOperand(4));
4601 Ops.push_back(Chain);
4603 case NVPTXISD::Suld2DArrayV2I64Zero:
4604 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
4605 Ops.push_back(TexHandle);
4606 Ops.push_back(N->getOperand(2));
4607 Ops.push_back(N->getOperand(3));
4608 Ops.push_back(N->getOperand(4));
4609 Ops.push_back(Chain);
4611 case NVPTXISD::Suld2DArrayV4I8Zero:
4612 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
4613 Ops.push_back(TexHandle);
4614 Ops.push_back(N->getOperand(2));
4615 Ops.push_back(N->getOperand(3));
4616 Ops.push_back(N->getOperand(4));
4617 Ops.push_back(Chain);
4619 case NVPTXISD::Suld2DArrayV4I16Zero:
4620 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
4621 Ops.push_back(TexHandle);
4622 Ops.push_back(N->getOperand(2));
4623 Ops.push_back(N->getOperand(3));
4624 Ops.push_back(N->getOperand(4));
4625 Ops.push_back(Chain);
4627 case NVPTXISD::Suld2DArrayV4I32Zero:
4628 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
4629 Ops.push_back(TexHandle);
4630 Ops.push_back(N->getOperand(2));
4631 Ops.push_back(N->getOperand(3));
4632 Ops.push_back(N->getOperand(4));
4633 Ops.push_back(Chain);
4635 case NVPTXISD::Suld3DI8Zero:
4636 Opc = NVPTX::SULD_3D_I8_ZERO;
4637 Ops.push_back(TexHandle);
4638 Ops.push_back(N->getOperand(2));
4639 Ops.push_back(N->getOperand(3));
4640 Ops.push_back(N->getOperand(4));
4641 Ops.push_back(Chain);
4643 case NVPTXISD::Suld3DI16Zero:
4644 Opc = NVPTX::SULD_3D_I16_ZERO;
4645 Ops.push_back(TexHandle);
4646 Ops.push_back(N->getOperand(2));
4647 Ops.push_back(N->getOperand(3));
4648 Ops.push_back(N->getOperand(4));
4649 Ops.push_back(Chain);
4651 case NVPTXISD::Suld3DI32Zero:
4652 Opc = NVPTX::SULD_3D_I32_ZERO;
4653 Ops.push_back(TexHandle);
4654 Ops.push_back(N->getOperand(2));
4655 Ops.push_back(N->getOperand(3));
4656 Ops.push_back(N->getOperand(4));
4657 Ops.push_back(Chain);
4659 case NVPTXISD::Suld3DI64Zero:
4660 Opc = NVPTX::SULD_3D_I64_ZERO;
4661 Ops.push_back(TexHandle);
4662 Ops.push_back(N->getOperand(2));
4663 Ops.push_back(N->getOperand(3));
4664 Ops.push_back(N->getOperand(4));
4665 Ops.push_back(Chain);
4667 case NVPTXISD::Suld3DV2I8Zero:
4668 Opc = NVPTX::SULD_3D_V2I8_ZERO;
4669 Ops.push_back(TexHandle);
4670 Ops.push_back(N->getOperand(2));
4671 Ops.push_back(N->getOperand(3));
4672 Ops.push_back(N->getOperand(4));
4673 Ops.push_back(Chain);
4675 case NVPTXISD::Suld3DV2I16Zero:
4676 Opc = NVPTX::SULD_3D_V2I16_ZERO;
4677 Ops.push_back(TexHandle);
4678 Ops.push_back(N->getOperand(2));
4679 Ops.push_back(N->getOperand(3));
4680 Ops.push_back(N->getOperand(4));
4681 Ops.push_back(Chain);
4683 case NVPTXISD::Suld3DV2I32Zero:
4684 Opc = NVPTX::SULD_3D_V2I32_ZERO;
4685 Ops.push_back(TexHandle);
4686 Ops.push_back(N->getOperand(2));
4687 Ops.push_back(N->getOperand(3));
4688 Ops.push_back(N->getOperand(4));
4689 Ops.push_back(Chain);
4691 case NVPTXISD::Suld3DV2I64Zero:
4692 Opc = NVPTX::SULD_3D_V2I64_ZERO;
4693 Ops.push_back(TexHandle);
4694 Ops.push_back(N->getOperand(2));
4695 Ops.push_back(N->getOperand(3));
4696 Ops.push_back(N->getOperand(4));
4697 Ops.push_back(Chain);
4699 case NVPTXISD::Suld3DV4I8Zero:
4700 Opc = NVPTX::SULD_3D_V4I8_ZERO;
4701 Ops.push_back(TexHandle);
4702 Ops.push_back(N->getOperand(2));
4703 Ops.push_back(N->getOperand(3));
4704 Ops.push_back(N->getOperand(4));
4705 Ops.push_back(Chain);
4707 case NVPTXISD::Suld3DV4I16Zero:
4708 Opc = NVPTX::SULD_3D_V4I16_ZERO;
4709 Ops.push_back(TexHandle);
4710 Ops.push_back(N->getOperand(2));
4711 Ops.push_back(N->getOperand(3));
4712 Ops.push_back(N->getOperand(4));
4713 Ops.push_back(Chain);
4715 case NVPTXISD::Suld3DV4I32Zero:
4716 Opc = NVPTX::SULD_3D_V4I32_ZERO;
4717 Ops.push_back(TexHandle);
4718 Ops.push_back(N->getOperand(2));
4719 Ops.push_back(N->getOperand(3));
4720 Ops.push_back(N->getOperand(4));
4721 Ops.push_back(Chain);
4724 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4729 /// SelectBFE - Look for instruction sequences that can be made more efficient
4730 /// by using the 'bfe' (bit-field extract) PTX instruction
4731 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
4733 SDValue LHS = N->getOperand(0);
4734 SDValue RHS = N->getOperand(1);
4738 bool IsSigned = false;
4740 if (N->getOpcode() == ISD::AND) {
4741 // Canonicalize the operands
4742 // We want 'and %val, %mask'
4743 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4744 std::swap(LHS, RHS);
4747 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
4749 // We need a constant mask on the RHS of the AND
4753 // Extract the mask bits
4754 uint64_t MaskVal = Mask->getZExtValue();
4755 if (!isMask_64(MaskVal)) {
4756 // We *could* handle shifted masks here, but doing so would require an
4757 // 'and' operation to fix up the low-order bits so we would trade
4758 // shr+and for bfe+and, which has the same throughput
4762 // How many bits are in our mask?
4763 uint64_t NumBits = countTrailingOnes(MaskVal);
4764 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4766 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4767 // We have a 'srl/and' pair, extract the effective start bit and length
4768 Val = LHS.getNode()->getOperand(0);
4769 Start = LHS.getNode()->getOperand(1);
4770 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4772 uint64_t StartVal = StartConst->getZExtValue();
4773 // How many "good" bits do we have left? "good" is defined here as bits
4774 // that exist in the original value, not shifted in.
4775 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
4776 if (NumBits > GoodBits) {
4777 // Do not handle the case where bits have been shifted in. In theory
4778 // we could handle this, but the cost is likely higher than just
4779 // emitting the srl/and pair.
4782 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
4784 // Do not handle the case where the shift amount (can be zero if no srl
4785 // was found) is not constant. We could handle this case, but it would
4786 // require run-time logic that would be more expensive than just
4787 // emitting the srl/and pair.
4791 // Do not handle the case where the LHS of the and is not a shift. While
4792 // it would be trivial to handle this case, it would just transform
4793 // 'and' -> 'bfe', but 'and' has higher-throughput.
4796 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4797 if (LHS->getOpcode() == ISD::AND) {
4798 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4800 // Shift amount must be constant
4804 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4806 SDValue AndLHS = LHS->getOperand(0);
4807 SDValue AndRHS = LHS->getOperand(1);
4809 // Canonicalize the AND to have the mask on the RHS
4810 if (isa<ConstantSDNode>(AndLHS)) {
4811 std::swap(AndLHS, AndRHS);
4814 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4816 // Mask must be constant
4820 uint64_t MaskVal = MaskCnst->getZExtValue();
4823 if (isMask_64(MaskVal)) {
4825 // The number of bits in the result bitfield will be the number of
4826 // trailing ones (the AND) minus the number of bits we shift off
4827 NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
4828 } else if (isShiftedMask_64(MaskVal)) {
4829 NumZeros = countTrailingZeros(MaskVal);
4830 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
4831 // The number of bits in the result bitfield will be the number of
4832 // trailing zeros plus the number of set bits in the mask minus the
4833 // number of bits we shift off
4834 NumBits = NumZeros + NumOnes - ShiftAmt;
4836 // This is not a mask we can handle
4840 if (ShiftAmt < NumZeros) {
4841 // Handling this case would require extra logic that would make this
4842 // transformation non-profitable
4847 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
4848 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4849 } else if (LHS->getOpcode() == ISD::SHL) {
4850 // Here, we have a pattern like:
4852 // (sra (shl val, NN), MM)
4854 // (srl (shl val, NN), MM)
4856 // If MM >= NN, we can efficiently optimize this with bfe
4857 Val = LHS->getOperand(0);
4859 SDValue ShlRHS = LHS->getOperand(1);
4860 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
4862 // Shift amount must be constant
4865 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
4867 SDValue ShrRHS = RHS;
4868 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
4870 // Shift amount must be constant
4873 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
4875 // To avoid extra codegen and be profitable, we need Outer >= Inner
4876 if (OuterShiftAmt < InnerShiftAmt) {
4880 // If the outer shift is more than the type size, we have no bitfield to
4881 // extract (since we also check that the inner shift is <= the outer shift
4882 // then this also implies that the inner shift is < the type size)
4883 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
4888 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32);
4890 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
4891 OuterShiftAmt, DL, MVT::i32);
4893 if (N->getOpcode() == ISD::SRA) {
4894 // If we have a arithmetic right shift, we need to use the signed bfe
4909 // For the BFE operations we form here from "and" and "srl", always use the
4910 // unsigned variants.
4911 if (Val.getValueType() == MVT::i32) {
4913 Opc = NVPTX::BFE_S32rii;
4915 Opc = NVPTX::BFE_U32rii;
4917 } else if (Val.getValueType() == MVT::i64) {
4919 Opc = NVPTX::BFE_S64rii;
4921 Opc = NVPTX::BFE_U64rii;
4924 // We cannot handle this type
4932 return CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops);
4935 // SelectDirectAddr - Match a direct address for DAG.
4936 // A direct address could be a globaladdress or externalsymbol.
4937 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
4938 // Return true if TGA or ES.
4939 if (N.getOpcode() == ISD::TargetGlobalAddress ||
4940 N.getOpcode() == ISD::TargetExternalSymbol) {
4944 if (N.getOpcode() == NVPTXISD::Wrapper) {
4945 Address = N.getOperand(0);
4948 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
4949 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
4950 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
4951 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
4952 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
4958 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
4959 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
4960 if (Addr.getOpcode() == ISD::ADD) {
4961 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
4962 SDValue base = Addr.getOperand(0);
4963 if (SelectDirectAddr(base, Base)) {
4964 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
4974 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
4975 SDValue &Base, SDValue &Offset) {
4976 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
4980 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
4981 SDValue &Base, SDValue &Offset) {
4982 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
4986 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
4987 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
4988 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
4989 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
4990 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
4993 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
4994 Addr.getOpcode() == ISD::TargetGlobalAddress)
4995 return false; // direct calls.
4997 if (Addr.getOpcode() == ISD::ADD) {
4998 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
5001 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5002 if (FrameIndexSDNode *FIN =
5003 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
5004 // Constant offset from frame ref.
5005 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5007 Base = Addr.getOperand(0);
5008 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5017 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5018 SDValue &Base, SDValue &Offset) {
5019 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5023 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5024 SDValue &Base, SDValue &Offset) {
5025 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5028 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5029 unsigned int spN) const {
5030 const Value *Src = nullptr;
5031 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
5032 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5034 Src = mN->getMemOperand()->getValue();
5038 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
5039 return (PT->getAddressSpace() == spN);
5043 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5044 /// inline asm expressions.
5045 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
5046 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
5048 switch (ConstraintID) {
5051 case InlineAsm::Constraint_m: // memory
5052 if (SelectDirectAddr(Op, Op0)) {
5053 OutOps.push_back(Op0);
5054 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
5057 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5058 OutOps.push_back(Op0);
5059 OutOps.push_back(Op1);