1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines an instruction selector for the NVPTX target.
12 //===----------------------------------------------------------------------===//
14 #include "NVPTXISelDAGToDAG.h"
15 #include "llvm/Analysis/ValueTracking.h"
16 #include "llvm/IR/GlobalValue.h"
17 #include "llvm/IR/Instructions.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include "llvm/Target/TargetIntrinsicInfo.h"
26 #define DEBUG_TYPE "nvptx-isel"
28 static cl::opt<int> UsePrecDivF32(
29 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
30 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
31 " IEEE Compliant F32 div.rnd if available."),
35 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
36 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
40 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
41 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
45 /// createNVPTXISelDag - This pass converts a legalized DAG into a
46 /// NVPTX-specific DAG, ready for instruction scheduling.
47 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
48 llvm::CodeGenOpt::Level OptLevel) {
49 return new NVPTXDAGToDAGISel(TM, OptLevel);
52 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
53 CodeGenOpt::Level OptLevel)
54 : SelectionDAGISel(tm, OptLevel), TM(tm) {
55 doMulWide = (OptLevel > 0);
58 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
59 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
60 return SelectionDAGISel::runOnMachineFunction(MF);
63 int NVPTXDAGToDAGISel::getDivF32Level() const {
64 if (UsePrecDivF32.getNumOccurrences() > 0) {
65 // If nvptx-prec-div32=N is used on the command-line, always honor it
68 // Otherwise, use div.approx if fast math is enabled
69 if (TM.Options.UnsafeFPMath)
76 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
77 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
78 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
79 return UsePrecSqrtF32;
81 // Otherwise, use sqrt.approx if fast math is enabled
82 return !TM.Options.UnsafeFPMath;
86 bool NVPTXDAGToDAGISel::useF32FTZ() const {
87 if (FtzEnabled.getNumOccurrences() > 0) {
88 // If nvptx-f32ftz is used on the command-line, always honor it
91 const Function *F = MF->getFunction();
92 // Otherwise, check for an nvptx-f32ftz attribute on the function
93 if (F->hasFnAttribute("nvptx-f32ftz"))
94 return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
100 bool NVPTXDAGToDAGISel::allowFMA() const {
101 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
102 return TL->allowFMA(*MF, OptLevel);
105 /// Select - Select instructions not customized! Used for
106 /// expanded, promoted and normal instructions.
107 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
109 if (N->isMachineOpcode()) {
111 return nullptr; // Already selected.
114 SDNode *ResNode = nullptr;
115 switch (N->getOpcode()) {
117 ResNode = SelectLoad(N);
120 ResNode = SelectStore(N);
122 case NVPTXISD::LoadV2:
123 case NVPTXISD::LoadV4:
124 ResNode = SelectLoadVector(N);
126 case NVPTXISD::LDGV2:
127 case NVPTXISD::LDGV4:
128 case NVPTXISD::LDUV2:
129 case NVPTXISD::LDUV4:
130 ResNode = SelectLDGLDU(N);
132 case NVPTXISD::StoreV2:
133 case NVPTXISD::StoreV4:
134 ResNode = SelectStoreVector(N);
136 case NVPTXISD::LoadParam:
137 case NVPTXISD::LoadParamV2:
138 case NVPTXISD::LoadParamV4:
139 ResNode = SelectLoadParam(N);
141 case NVPTXISD::StoreRetval:
142 case NVPTXISD::StoreRetvalV2:
143 case NVPTXISD::StoreRetvalV4:
144 ResNode = SelectStoreRetval(N);
146 case NVPTXISD::StoreParam:
147 case NVPTXISD::StoreParamV2:
148 case NVPTXISD::StoreParamV4:
149 case NVPTXISD::StoreParamS32:
150 case NVPTXISD::StoreParamU32:
151 ResNode = SelectStoreParam(N);
153 case ISD::INTRINSIC_WO_CHAIN:
154 ResNode = SelectIntrinsicNoChain(N);
156 case ISD::INTRINSIC_W_CHAIN:
157 ResNode = SelectIntrinsicChain(N);
159 case NVPTXISD::Tex1DFloatS32:
160 case NVPTXISD::Tex1DFloatFloat:
161 case NVPTXISD::Tex1DFloatFloatLevel:
162 case NVPTXISD::Tex1DFloatFloatGrad:
163 case NVPTXISD::Tex1DS32S32:
164 case NVPTXISD::Tex1DS32Float:
165 case NVPTXISD::Tex1DS32FloatLevel:
166 case NVPTXISD::Tex1DS32FloatGrad:
167 case NVPTXISD::Tex1DU32S32:
168 case NVPTXISD::Tex1DU32Float:
169 case NVPTXISD::Tex1DU32FloatLevel:
170 case NVPTXISD::Tex1DU32FloatGrad:
171 case NVPTXISD::Tex1DArrayFloatS32:
172 case NVPTXISD::Tex1DArrayFloatFloat:
173 case NVPTXISD::Tex1DArrayFloatFloatLevel:
174 case NVPTXISD::Tex1DArrayFloatFloatGrad:
175 case NVPTXISD::Tex1DArrayS32S32:
176 case NVPTXISD::Tex1DArrayS32Float:
177 case NVPTXISD::Tex1DArrayS32FloatLevel:
178 case NVPTXISD::Tex1DArrayS32FloatGrad:
179 case NVPTXISD::Tex1DArrayU32S32:
180 case NVPTXISD::Tex1DArrayU32Float:
181 case NVPTXISD::Tex1DArrayU32FloatLevel:
182 case NVPTXISD::Tex1DArrayU32FloatGrad:
183 case NVPTXISD::Tex2DFloatS32:
184 case NVPTXISD::Tex2DFloatFloat:
185 case NVPTXISD::Tex2DFloatFloatLevel:
186 case NVPTXISD::Tex2DFloatFloatGrad:
187 case NVPTXISD::Tex2DS32S32:
188 case NVPTXISD::Tex2DS32Float:
189 case NVPTXISD::Tex2DS32FloatLevel:
190 case NVPTXISD::Tex2DS32FloatGrad:
191 case NVPTXISD::Tex2DU32S32:
192 case NVPTXISD::Tex2DU32Float:
193 case NVPTXISD::Tex2DU32FloatLevel:
194 case NVPTXISD::Tex2DU32FloatGrad:
195 case NVPTXISD::Tex2DArrayFloatS32:
196 case NVPTXISD::Tex2DArrayFloatFloat:
197 case NVPTXISD::Tex2DArrayFloatFloatLevel:
198 case NVPTXISD::Tex2DArrayFloatFloatGrad:
199 case NVPTXISD::Tex2DArrayS32S32:
200 case NVPTXISD::Tex2DArrayS32Float:
201 case NVPTXISD::Tex2DArrayS32FloatLevel:
202 case NVPTXISD::Tex2DArrayS32FloatGrad:
203 case NVPTXISD::Tex2DArrayU32S32:
204 case NVPTXISD::Tex2DArrayU32Float:
205 case NVPTXISD::Tex2DArrayU32FloatLevel:
206 case NVPTXISD::Tex2DArrayU32FloatGrad:
207 case NVPTXISD::Tex3DFloatS32:
208 case NVPTXISD::Tex3DFloatFloat:
209 case NVPTXISD::Tex3DFloatFloatLevel:
210 case NVPTXISD::Tex3DFloatFloatGrad:
211 case NVPTXISD::Tex3DS32S32:
212 case NVPTXISD::Tex3DS32Float:
213 case NVPTXISD::Tex3DS32FloatLevel:
214 case NVPTXISD::Tex3DS32FloatGrad:
215 case NVPTXISD::Tex3DU32S32:
216 case NVPTXISD::Tex3DU32Float:
217 case NVPTXISD::Tex3DU32FloatLevel:
218 case NVPTXISD::Tex3DU32FloatGrad:
219 case NVPTXISD::TexCubeFloatFloat:
220 case NVPTXISD::TexCubeFloatFloatLevel:
221 case NVPTXISD::TexCubeS32Float:
222 case NVPTXISD::TexCubeS32FloatLevel:
223 case NVPTXISD::TexCubeU32Float:
224 case NVPTXISD::TexCubeU32FloatLevel:
225 case NVPTXISD::TexCubeArrayFloatFloat:
226 case NVPTXISD::TexCubeArrayFloatFloatLevel:
227 case NVPTXISD::TexCubeArrayS32Float:
228 case NVPTXISD::TexCubeArrayS32FloatLevel:
229 case NVPTXISD::TexCubeArrayU32Float:
230 case NVPTXISD::TexCubeArrayU32FloatLevel:
231 case NVPTXISD::Tld4R2DFloatFloat:
232 case NVPTXISD::Tld4G2DFloatFloat:
233 case NVPTXISD::Tld4B2DFloatFloat:
234 case NVPTXISD::Tld4A2DFloatFloat:
235 case NVPTXISD::Tld4R2DS64Float:
236 case NVPTXISD::Tld4G2DS64Float:
237 case NVPTXISD::Tld4B2DS64Float:
238 case NVPTXISD::Tld4A2DS64Float:
239 case NVPTXISD::Tld4R2DU64Float:
240 case NVPTXISD::Tld4G2DU64Float:
241 case NVPTXISD::Tld4B2DU64Float:
242 case NVPTXISD::Tld4A2DU64Float:
243 case NVPTXISD::TexUnified1DFloatS32:
244 case NVPTXISD::TexUnified1DFloatFloat:
245 case NVPTXISD::TexUnified1DFloatFloatLevel:
246 case NVPTXISD::TexUnified1DFloatFloatGrad:
247 case NVPTXISD::TexUnified1DS32S32:
248 case NVPTXISD::TexUnified1DS32Float:
249 case NVPTXISD::TexUnified1DS32FloatLevel:
250 case NVPTXISD::TexUnified1DS32FloatGrad:
251 case NVPTXISD::TexUnified1DU32S32:
252 case NVPTXISD::TexUnified1DU32Float:
253 case NVPTXISD::TexUnified1DU32FloatLevel:
254 case NVPTXISD::TexUnified1DU32FloatGrad:
255 case NVPTXISD::TexUnified1DArrayFloatS32:
256 case NVPTXISD::TexUnified1DArrayFloatFloat:
257 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
258 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
259 case NVPTXISD::TexUnified1DArrayS32S32:
260 case NVPTXISD::TexUnified1DArrayS32Float:
261 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
262 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
263 case NVPTXISD::TexUnified1DArrayU32S32:
264 case NVPTXISD::TexUnified1DArrayU32Float:
265 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
266 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
267 case NVPTXISD::TexUnified2DFloatS32:
268 case NVPTXISD::TexUnified2DFloatFloat:
269 case NVPTXISD::TexUnified2DFloatFloatLevel:
270 case NVPTXISD::TexUnified2DFloatFloatGrad:
271 case NVPTXISD::TexUnified2DS32S32:
272 case NVPTXISD::TexUnified2DS32Float:
273 case NVPTXISD::TexUnified2DS32FloatLevel:
274 case NVPTXISD::TexUnified2DS32FloatGrad:
275 case NVPTXISD::TexUnified2DU32S32:
276 case NVPTXISD::TexUnified2DU32Float:
277 case NVPTXISD::TexUnified2DU32FloatLevel:
278 case NVPTXISD::TexUnified2DU32FloatGrad:
279 case NVPTXISD::TexUnified2DArrayFloatS32:
280 case NVPTXISD::TexUnified2DArrayFloatFloat:
281 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
282 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
283 case NVPTXISD::TexUnified2DArrayS32S32:
284 case NVPTXISD::TexUnified2DArrayS32Float:
285 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
286 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
287 case NVPTXISD::TexUnified2DArrayU32S32:
288 case NVPTXISD::TexUnified2DArrayU32Float:
289 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
290 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
291 case NVPTXISD::TexUnified3DFloatS32:
292 case NVPTXISD::TexUnified3DFloatFloat:
293 case NVPTXISD::TexUnified3DFloatFloatLevel:
294 case NVPTXISD::TexUnified3DFloatFloatGrad:
295 case NVPTXISD::TexUnified3DS32S32:
296 case NVPTXISD::TexUnified3DS32Float:
297 case NVPTXISD::TexUnified3DS32FloatLevel:
298 case NVPTXISD::TexUnified3DS32FloatGrad:
299 case NVPTXISD::TexUnified3DU32S32:
300 case NVPTXISD::TexUnified3DU32Float:
301 case NVPTXISD::TexUnified3DU32FloatLevel:
302 case NVPTXISD::TexUnified3DU32FloatGrad:
303 case NVPTXISD::TexUnifiedCubeFloatFloat:
304 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
305 case NVPTXISD::TexUnifiedCubeS32Float:
306 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
307 case NVPTXISD::TexUnifiedCubeU32Float:
308 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
309 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
310 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
311 case NVPTXISD::TexUnifiedCubeArrayS32Float:
312 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
313 case NVPTXISD::TexUnifiedCubeArrayU32Float:
314 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
315 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
316 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
317 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
318 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
319 case NVPTXISD::Tld4UnifiedR2DS64Float:
320 case NVPTXISD::Tld4UnifiedG2DS64Float:
321 case NVPTXISD::Tld4UnifiedB2DS64Float:
322 case NVPTXISD::Tld4UnifiedA2DS64Float:
323 case NVPTXISD::Tld4UnifiedR2DU64Float:
324 case NVPTXISD::Tld4UnifiedG2DU64Float:
325 case NVPTXISD::Tld4UnifiedB2DU64Float:
326 case NVPTXISD::Tld4UnifiedA2DU64Float:
327 ResNode = SelectTextureIntrinsic(N);
329 case NVPTXISD::Suld1DI8Clamp:
330 case NVPTXISD::Suld1DI16Clamp:
331 case NVPTXISD::Suld1DI32Clamp:
332 case NVPTXISD::Suld1DI64Clamp:
333 case NVPTXISD::Suld1DV2I8Clamp:
334 case NVPTXISD::Suld1DV2I16Clamp:
335 case NVPTXISD::Suld1DV2I32Clamp:
336 case NVPTXISD::Suld1DV2I64Clamp:
337 case NVPTXISD::Suld1DV4I8Clamp:
338 case NVPTXISD::Suld1DV4I16Clamp:
339 case NVPTXISD::Suld1DV4I32Clamp:
340 case NVPTXISD::Suld1DArrayI8Clamp:
341 case NVPTXISD::Suld1DArrayI16Clamp:
342 case NVPTXISD::Suld1DArrayI32Clamp:
343 case NVPTXISD::Suld1DArrayI64Clamp:
344 case NVPTXISD::Suld1DArrayV2I8Clamp:
345 case NVPTXISD::Suld1DArrayV2I16Clamp:
346 case NVPTXISD::Suld1DArrayV2I32Clamp:
347 case NVPTXISD::Suld1DArrayV2I64Clamp:
348 case NVPTXISD::Suld1DArrayV4I8Clamp:
349 case NVPTXISD::Suld1DArrayV4I16Clamp:
350 case NVPTXISD::Suld1DArrayV4I32Clamp:
351 case NVPTXISD::Suld2DI8Clamp:
352 case NVPTXISD::Suld2DI16Clamp:
353 case NVPTXISD::Suld2DI32Clamp:
354 case NVPTXISD::Suld2DI64Clamp:
355 case NVPTXISD::Suld2DV2I8Clamp:
356 case NVPTXISD::Suld2DV2I16Clamp:
357 case NVPTXISD::Suld2DV2I32Clamp:
358 case NVPTXISD::Suld2DV2I64Clamp:
359 case NVPTXISD::Suld2DV4I8Clamp:
360 case NVPTXISD::Suld2DV4I16Clamp:
361 case NVPTXISD::Suld2DV4I32Clamp:
362 case NVPTXISD::Suld2DArrayI8Clamp:
363 case NVPTXISD::Suld2DArrayI16Clamp:
364 case NVPTXISD::Suld2DArrayI32Clamp:
365 case NVPTXISD::Suld2DArrayI64Clamp:
366 case NVPTXISD::Suld2DArrayV2I8Clamp:
367 case NVPTXISD::Suld2DArrayV2I16Clamp:
368 case NVPTXISD::Suld2DArrayV2I32Clamp:
369 case NVPTXISD::Suld2DArrayV2I64Clamp:
370 case NVPTXISD::Suld2DArrayV4I8Clamp:
371 case NVPTXISD::Suld2DArrayV4I16Clamp:
372 case NVPTXISD::Suld2DArrayV4I32Clamp:
373 case NVPTXISD::Suld3DI8Clamp:
374 case NVPTXISD::Suld3DI16Clamp:
375 case NVPTXISD::Suld3DI32Clamp:
376 case NVPTXISD::Suld3DI64Clamp:
377 case NVPTXISD::Suld3DV2I8Clamp:
378 case NVPTXISD::Suld3DV2I16Clamp:
379 case NVPTXISD::Suld3DV2I32Clamp:
380 case NVPTXISD::Suld3DV2I64Clamp:
381 case NVPTXISD::Suld3DV4I8Clamp:
382 case NVPTXISD::Suld3DV4I16Clamp:
383 case NVPTXISD::Suld3DV4I32Clamp:
384 case NVPTXISD::Suld1DI8Trap:
385 case NVPTXISD::Suld1DI16Trap:
386 case NVPTXISD::Suld1DI32Trap:
387 case NVPTXISD::Suld1DI64Trap:
388 case NVPTXISD::Suld1DV2I8Trap:
389 case NVPTXISD::Suld1DV2I16Trap:
390 case NVPTXISD::Suld1DV2I32Trap:
391 case NVPTXISD::Suld1DV2I64Trap:
392 case NVPTXISD::Suld1DV4I8Trap:
393 case NVPTXISD::Suld1DV4I16Trap:
394 case NVPTXISD::Suld1DV4I32Trap:
395 case NVPTXISD::Suld1DArrayI8Trap:
396 case NVPTXISD::Suld1DArrayI16Trap:
397 case NVPTXISD::Suld1DArrayI32Trap:
398 case NVPTXISD::Suld1DArrayI64Trap:
399 case NVPTXISD::Suld1DArrayV2I8Trap:
400 case NVPTXISD::Suld1DArrayV2I16Trap:
401 case NVPTXISD::Suld1DArrayV2I32Trap:
402 case NVPTXISD::Suld1DArrayV2I64Trap:
403 case NVPTXISD::Suld1DArrayV4I8Trap:
404 case NVPTXISD::Suld1DArrayV4I16Trap:
405 case NVPTXISD::Suld1DArrayV4I32Trap:
406 case NVPTXISD::Suld2DI8Trap:
407 case NVPTXISD::Suld2DI16Trap:
408 case NVPTXISD::Suld2DI32Trap:
409 case NVPTXISD::Suld2DI64Trap:
410 case NVPTXISD::Suld2DV2I8Trap:
411 case NVPTXISD::Suld2DV2I16Trap:
412 case NVPTXISD::Suld2DV2I32Trap:
413 case NVPTXISD::Suld2DV2I64Trap:
414 case NVPTXISD::Suld2DV4I8Trap:
415 case NVPTXISD::Suld2DV4I16Trap:
416 case NVPTXISD::Suld2DV4I32Trap:
417 case NVPTXISD::Suld2DArrayI8Trap:
418 case NVPTXISD::Suld2DArrayI16Trap:
419 case NVPTXISD::Suld2DArrayI32Trap:
420 case NVPTXISD::Suld2DArrayI64Trap:
421 case NVPTXISD::Suld2DArrayV2I8Trap:
422 case NVPTXISD::Suld2DArrayV2I16Trap:
423 case NVPTXISD::Suld2DArrayV2I32Trap:
424 case NVPTXISD::Suld2DArrayV2I64Trap:
425 case NVPTXISD::Suld2DArrayV4I8Trap:
426 case NVPTXISD::Suld2DArrayV4I16Trap:
427 case NVPTXISD::Suld2DArrayV4I32Trap:
428 case NVPTXISD::Suld3DI8Trap:
429 case NVPTXISD::Suld3DI16Trap:
430 case NVPTXISD::Suld3DI32Trap:
431 case NVPTXISD::Suld3DI64Trap:
432 case NVPTXISD::Suld3DV2I8Trap:
433 case NVPTXISD::Suld3DV2I16Trap:
434 case NVPTXISD::Suld3DV2I32Trap:
435 case NVPTXISD::Suld3DV2I64Trap:
436 case NVPTXISD::Suld3DV4I8Trap:
437 case NVPTXISD::Suld3DV4I16Trap:
438 case NVPTXISD::Suld3DV4I32Trap:
439 case NVPTXISD::Suld1DI8Zero:
440 case NVPTXISD::Suld1DI16Zero:
441 case NVPTXISD::Suld1DI32Zero:
442 case NVPTXISD::Suld1DI64Zero:
443 case NVPTXISD::Suld1DV2I8Zero:
444 case NVPTXISD::Suld1DV2I16Zero:
445 case NVPTXISD::Suld1DV2I32Zero:
446 case NVPTXISD::Suld1DV2I64Zero:
447 case NVPTXISD::Suld1DV4I8Zero:
448 case NVPTXISD::Suld1DV4I16Zero:
449 case NVPTXISD::Suld1DV4I32Zero:
450 case NVPTXISD::Suld1DArrayI8Zero:
451 case NVPTXISD::Suld1DArrayI16Zero:
452 case NVPTXISD::Suld1DArrayI32Zero:
453 case NVPTXISD::Suld1DArrayI64Zero:
454 case NVPTXISD::Suld1DArrayV2I8Zero:
455 case NVPTXISD::Suld1DArrayV2I16Zero:
456 case NVPTXISD::Suld1DArrayV2I32Zero:
457 case NVPTXISD::Suld1DArrayV2I64Zero:
458 case NVPTXISD::Suld1DArrayV4I8Zero:
459 case NVPTXISD::Suld1DArrayV4I16Zero:
460 case NVPTXISD::Suld1DArrayV4I32Zero:
461 case NVPTXISD::Suld2DI8Zero:
462 case NVPTXISD::Suld2DI16Zero:
463 case NVPTXISD::Suld2DI32Zero:
464 case NVPTXISD::Suld2DI64Zero:
465 case NVPTXISD::Suld2DV2I8Zero:
466 case NVPTXISD::Suld2DV2I16Zero:
467 case NVPTXISD::Suld2DV2I32Zero:
468 case NVPTXISD::Suld2DV2I64Zero:
469 case NVPTXISD::Suld2DV4I8Zero:
470 case NVPTXISD::Suld2DV4I16Zero:
471 case NVPTXISD::Suld2DV4I32Zero:
472 case NVPTXISD::Suld2DArrayI8Zero:
473 case NVPTXISD::Suld2DArrayI16Zero:
474 case NVPTXISD::Suld2DArrayI32Zero:
475 case NVPTXISD::Suld2DArrayI64Zero:
476 case NVPTXISD::Suld2DArrayV2I8Zero:
477 case NVPTXISD::Suld2DArrayV2I16Zero:
478 case NVPTXISD::Suld2DArrayV2I32Zero:
479 case NVPTXISD::Suld2DArrayV2I64Zero:
480 case NVPTXISD::Suld2DArrayV4I8Zero:
481 case NVPTXISD::Suld2DArrayV4I16Zero:
482 case NVPTXISD::Suld2DArrayV4I32Zero:
483 case NVPTXISD::Suld3DI8Zero:
484 case NVPTXISD::Suld3DI16Zero:
485 case NVPTXISD::Suld3DI32Zero:
486 case NVPTXISD::Suld3DI64Zero:
487 case NVPTXISD::Suld3DV2I8Zero:
488 case NVPTXISD::Suld3DV2I16Zero:
489 case NVPTXISD::Suld3DV2I32Zero:
490 case NVPTXISD::Suld3DV2I64Zero:
491 case NVPTXISD::Suld3DV4I8Zero:
492 case NVPTXISD::Suld3DV4I16Zero:
493 case NVPTXISD::Suld3DV4I32Zero:
494 ResNode = SelectSurfaceIntrinsic(N);
500 ResNode = SelectBFE(N);
502 case ISD::ADDRSPACECAST:
503 ResNode = SelectAddrSpaceCast(N);
510 return SelectCode(N);
513 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
514 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
518 case Intrinsic::nvvm_ldg_global_f:
519 case Intrinsic::nvvm_ldg_global_i:
520 case Intrinsic::nvvm_ldg_global_p:
521 case Intrinsic::nvvm_ldu_global_f:
522 case Intrinsic::nvvm_ldu_global_i:
523 case Intrinsic::nvvm_ldu_global_p:
524 return SelectLDGLDU(N);
528 static unsigned int getCodeAddrSpace(MemSDNode *N) {
529 const Value *Src = N->getMemOperand()->getValue();
532 return NVPTX::PTXLdStInstCode::GENERIC;
534 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
535 switch (PT->getAddressSpace()) {
536 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
537 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
538 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
539 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
540 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
541 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
545 return NVPTX::PTXLdStInstCode::GENERIC;
548 static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
549 unsigned codeAddrSpace, const DataLayout &DL) {
550 if (!Subtarget.hasLDG() || codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL) {
554 // Check whether load operates on a readonly argument.
555 bool canUseLDG = false;
556 if (const Argument *A = dyn_cast<const Argument>(
557 GetUnderlyingObject(N->getMemOperand()->getValue(), DL)))
558 canUseLDG = A->onlyReadsMemory() && A->hasNoAliasAttr();
563 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
564 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
568 case Intrinsic::nvvm_texsurf_handle_internal:
569 return SelectTexSurfHandle(N);
573 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
574 // Op 0 is the intrinsic ID
575 SDValue Wrapper = N->getOperand(1);
576 SDValue GlobalVal = Wrapper.getOperand(0);
577 return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
581 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
582 SDValue Src = N->getOperand(0);
583 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
584 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
585 unsigned DstAddrSpace = CastN->getDestAddressSpace();
587 assert(SrcAddrSpace != DstAddrSpace &&
588 "addrspacecast must be between different address spaces");
590 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
591 // Specific to generic
593 switch (SrcAddrSpace) {
594 default: report_fatal_error("Bad address space in addrspacecast");
595 case ADDRESS_SPACE_GLOBAL:
596 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
598 case ADDRESS_SPACE_SHARED:
599 Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
601 case ADDRESS_SPACE_CONST:
602 Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
604 case ADDRESS_SPACE_LOCAL:
605 Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
608 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
610 // Generic to specific
611 if (SrcAddrSpace != 0)
612 report_fatal_error("Cannot cast between two non-generic address spaces");
614 switch (DstAddrSpace) {
615 default: report_fatal_error("Bad address space in addrspacecast");
616 case ADDRESS_SPACE_GLOBAL:
617 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
618 : NVPTX::cvta_to_global_yes;
620 case ADDRESS_SPACE_SHARED:
621 Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
622 : NVPTX::cvta_to_shared_yes;
624 case ADDRESS_SPACE_CONST:
626 TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
628 case ADDRESS_SPACE_LOCAL:
630 TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
632 case ADDRESS_SPACE_PARAM:
633 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
634 : NVPTX::nvvm_ptr_gen_to_param;
637 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
641 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
643 LoadSDNode *LD = cast<LoadSDNode>(N);
644 EVT LoadedVT = LD->getMemoryVT();
645 SDNode *NVPTXLD = nullptr;
647 // do not support pre/post inc/dec
651 if (!LoadedVT.isSimple())
654 // Address Space Setting
655 unsigned int codeAddrSpace = getCodeAddrSpace(LD);
657 if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, CurDAG->getDataLayout())) {
658 return SelectLDGLDU(N);
662 // - .volatile is only availalble for .global and .shared
663 bool isVolatile = LD->isVolatile();
664 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
665 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
666 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
670 MVT SimpleVT = LoadedVT.getSimpleVT();
671 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
672 if (SimpleVT.isVector()) {
673 unsigned num = SimpleVT.getVectorNumElements();
675 vecType = NVPTX::PTXLdStInstCode::V2;
677 vecType = NVPTX::PTXLdStInstCode::V4;
682 // Type Setting: fromType + fromTypeWidth
684 // Sign : ISD::SEXTLOAD
685 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
687 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
688 MVT ScalarVT = SimpleVT.getScalarType();
689 // Read at least 8 bits (predicates are stored as 8-bit values)
690 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
691 unsigned int fromType;
692 if ((LD->getExtensionType() == ISD::SEXTLOAD))
693 fromType = NVPTX::PTXLdStInstCode::Signed;
694 else if (ScalarVT.isFloatingPoint())
695 fromType = NVPTX::PTXLdStInstCode::Float;
697 fromType = NVPTX::PTXLdStInstCode::Unsigned;
699 // Create the machine instruction DAG
700 SDValue Chain = N->getOperand(0);
701 SDValue N1 = N->getOperand(1);
703 SDValue Offset, Base;
705 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
707 if (SelectDirectAddr(N1, Addr)) {
710 Opcode = NVPTX::LD_i8_avar;
713 Opcode = NVPTX::LD_i16_avar;
716 Opcode = NVPTX::LD_i32_avar;
719 Opcode = NVPTX::LD_i64_avar;
722 Opcode = NVPTX::LD_f32_avar;
725 Opcode = NVPTX::LD_f64_avar;
730 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
731 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
732 getI32Imm(fromTypeWidth, dl), Addr, Chain };
733 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
734 } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
735 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
738 Opcode = NVPTX::LD_i8_asi;
741 Opcode = NVPTX::LD_i16_asi;
744 Opcode = NVPTX::LD_i32_asi;
747 Opcode = NVPTX::LD_i64_asi;
750 Opcode = NVPTX::LD_f32_asi;
753 Opcode = NVPTX::LD_f64_asi;
758 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
759 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
760 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
761 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
762 } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
763 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
767 Opcode = NVPTX::LD_i8_ari_64;
770 Opcode = NVPTX::LD_i16_ari_64;
773 Opcode = NVPTX::LD_i32_ari_64;
776 Opcode = NVPTX::LD_i64_ari_64;
779 Opcode = NVPTX::LD_f32_ari_64;
782 Opcode = NVPTX::LD_f64_ari_64;
790 Opcode = NVPTX::LD_i8_ari;
793 Opcode = NVPTX::LD_i16_ari;
796 Opcode = NVPTX::LD_i32_ari;
799 Opcode = NVPTX::LD_i64_ari;
802 Opcode = NVPTX::LD_f32_ari;
805 Opcode = NVPTX::LD_f64_ari;
811 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
812 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
813 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
814 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
819 Opcode = NVPTX::LD_i8_areg_64;
822 Opcode = NVPTX::LD_i16_areg_64;
825 Opcode = NVPTX::LD_i32_areg_64;
828 Opcode = NVPTX::LD_i64_areg_64;
831 Opcode = NVPTX::LD_f32_areg_64;
834 Opcode = NVPTX::LD_f64_areg_64;
842 Opcode = NVPTX::LD_i8_areg;
845 Opcode = NVPTX::LD_i16_areg;
848 Opcode = NVPTX::LD_i32_areg;
851 Opcode = NVPTX::LD_i64_areg;
854 Opcode = NVPTX::LD_f32_areg;
857 Opcode = NVPTX::LD_f64_areg;
863 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
864 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
865 getI32Imm(fromTypeWidth, dl), N1, Chain };
866 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
870 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
871 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
872 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
878 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
880 SDValue Chain = N->getOperand(0);
881 SDValue Op1 = N->getOperand(1);
882 SDValue Addr, Offset, Base;
886 MemSDNode *MemSD = cast<MemSDNode>(N);
887 EVT LoadedVT = MemSD->getMemoryVT();
889 if (!LoadedVT.isSimple())
892 // Address Space Setting
893 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
895 if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, CurDAG->getDataLayout())) {
896 return SelectLDGLDU(N);
900 // - .volatile is only availalble for .global and .shared
901 bool IsVolatile = MemSD->isVolatile();
902 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
903 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
904 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
908 MVT SimpleVT = LoadedVT.getSimpleVT();
910 // Type Setting: fromType + fromTypeWidth
912 // Sign : ISD::SEXTLOAD
913 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
915 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
916 MVT ScalarVT = SimpleVT.getScalarType();
917 // Read at least 8 bits (predicates are stored as 8-bit values)
918 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
919 unsigned int FromType;
920 // The last operand holds the original LoadSDNode::getExtensionType() value
921 unsigned ExtensionType = cast<ConstantSDNode>(
922 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
923 if (ExtensionType == ISD::SEXTLOAD)
924 FromType = NVPTX::PTXLdStInstCode::Signed;
925 else if (ScalarVT.isFloatingPoint())
926 FromType = NVPTX::PTXLdStInstCode::Float;
928 FromType = NVPTX::PTXLdStInstCode::Unsigned;
932 switch (N->getOpcode()) {
933 case NVPTXISD::LoadV2:
934 VecType = NVPTX::PTXLdStInstCode::V2;
936 case NVPTXISD::LoadV4:
937 VecType = NVPTX::PTXLdStInstCode::V4;
943 EVT EltVT = N->getValueType(0);
945 if (SelectDirectAddr(Op1, Addr)) {
946 switch (N->getOpcode()) {
949 case NVPTXISD::LoadV2:
950 switch (EltVT.getSimpleVT().SimpleTy) {
954 Opcode = NVPTX::LDV_i8_v2_avar;
957 Opcode = NVPTX::LDV_i16_v2_avar;
960 Opcode = NVPTX::LDV_i32_v2_avar;
963 Opcode = NVPTX::LDV_i64_v2_avar;
966 Opcode = NVPTX::LDV_f32_v2_avar;
969 Opcode = NVPTX::LDV_f64_v2_avar;
973 case NVPTXISD::LoadV4:
974 switch (EltVT.getSimpleVT().SimpleTy) {
978 Opcode = NVPTX::LDV_i8_v4_avar;
981 Opcode = NVPTX::LDV_i16_v4_avar;
984 Opcode = NVPTX::LDV_i32_v4_avar;
987 Opcode = NVPTX::LDV_f32_v4_avar;
993 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
994 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
995 getI32Imm(FromTypeWidth, DL), Addr, Chain };
996 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
997 } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
998 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
999 switch (N->getOpcode()) {
1002 case NVPTXISD::LoadV2:
1003 switch (EltVT.getSimpleVT().SimpleTy) {
1007 Opcode = NVPTX::LDV_i8_v2_asi;
1010 Opcode = NVPTX::LDV_i16_v2_asi;
1013 Opcode = NVPTX::LDV_i32_v2_asi;
1016 Opcode = NVPTX::LDV_i64_v2_asi;
1019 Opcode = NVPTX::LDV_f32_v2_asi;
1022 Opcode = NVPTX::LDV_f64_v2_asi;
1026 case NVPTXISD::LoadV4:
1027 switch (EltVT.getSimpleVT().SimpleTy) {
1031 Opcode = NVPTX::LDV_i8_v4_asi;
1034 Opcode = NVPTX::LDV_i16_v4_asi;
1037 Opcode = NVPTX::LDV_i32_v4_asi;
1040 Opcode = NVPTX::LDV_f32_v4_asi;
1046 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1047 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1048 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1049 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1050 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1051 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1053 switch (N->getOpcode()) {
1056 case NVPTXISD::LoadV2:
1057 switch (EltVT.getSimpleVT().SimpleTy) {
1061 Opcode = NVPTX::LDV_i8_v2_ari_64;
1064 Opcode = NVPTX::LDV_i16_v2_ari_64;
1067 Opcode = NVPTX::LDV_i32_v2_ari_64;
1070 Opcode = NVPTX::LDV_i64_v2_ari_64;
1073 Opcode = NVPTX::LDV_f32_v2_ari_64;
1076 Opcode = NVPTX::LDV_f64_v2_ari_64;
1080 case NVPTXISD::LoadV4:
1081 switch (EltVT.getSimpleVT().SimpleTy) {
1085 Opcode = NVPTX::LDV_i8_v4_ari_64;
1088 Opcode = NVPTX::LDV_i16_v4_ari_64;
1091 Opcode = NVPTX::LDV_i32_v4_ari_64;
1094 Opcode = NVPTX::LDV_f32_v4_ari_64;
1100 switch (N->getOpcode()) {
1103 case NVPTXISD::LoadV2:
1104 switch (EltVT.getSimpleVT().SimpleTy) {
1108 Opcode = NVPTX::LDV_i8_v2_ari;
1111 Opcode = NVPTX::LDV_i16_v2_ari;
1114 Opcode = NVPTX::LDV_i32_v2_ari;
1117 Opcode = NVPTX::LDV_i64_v2_ari;
1120 Opcode = NVPTX::LDV_f32_v2_ari;
1123 Opcode = NVPTX::LDV_f64_v2_ari;
1127 case NVPTXISD::LoadV4:
1128 switch (EltVT.getSimpleVT().SimpleTy) {
1132 Opcode = NVPTX::LDV_i8_v4_ari;
1135 Opcode = NVPTX::LDV_i16_v4_ari;
1138 Opcode = NVPTX::LDV_i32_v4_ari;
1141 Opcode = NVPTX::LDV_f32_v4_ari;
1148 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1149 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1150 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1152 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1155 switch (N->getOpcode()) {
1158 case NVPTXISD::LoadV2:
1159 switch (EltVT.getSimpleVT().SimpleTy) {
1163 Opcode = NVPTX::LDV_i8_v2_areg_64;
1166 Opcode = NVPTX::LDV_i16_v2_areg_64;
1169 Opcode = NVPTX::LDV_i32_v2_areg_64;
1172 Opcode = NVPTX::LDV_i64_v2_areg_64;
1175 Opcode = NVPTX::LDV_f32_v2_areg_64;
1178 Opcode = NVPTX::LDV_f64_v2_areg_64;
1182 case NVPTXISD::LoadV4:
1183 switch (EltVT.getSimpleVT().SimpleTy) {
1187 Opcode = NVPTX::LDV_i8_v4_areg_64;
1190 Opcode = NVPTX::LDV_i16_v4_areg_64;
1193 Opcode = NVPTX::LDV_i32_v4_areg_64;
1196 Opcode = NVPTX::LDV_f32_v4_areg_64;
1202 switch (N->getOpcode()) {
1205 case NVPTXISD::LoadV2:
1206 switch (EltVT.getSimpleVT().SimpleTy) {
1210 Opcode = NVPTX::LDV_i8_v2_areg;
1213 Opcode = NVPTX::LDV_i16_v2_areg;
1216 Opcode = NVPTX::LDV_i32_v2_areg;
1219 Opcode = NVPTX::LDV_i64_v2_areg;
1222 Opcode = NVPTX::LDV_f32_v2_areg;
1225 Opcode = NVPTX::LDV_f64_v2_areg;
1229 case NVPTXISD::LoadV4:
1230 switch (EltVT.getSimpleVT().SimpleTy) {
1234 Opcode = NVPTX::LDV_i8_v4_areg;
1237 Opcode = NVPTX::LDV_i16_v4_areg;
1240 Opcode = NVPTX::LDV_i32_v4_areg;
1243 Opcode = NVPTX::LDV_f32_v4_areg;
1250 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1251 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1252 getI32Imm(FromTypeWidth, DL), Op1, Chain };
1253 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1256 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1257 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1258 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1263 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
1265 SDValue Chain = N->getOperand(0);
1270 // If this is an LDG intrinsic, the address is the third operand. Its its an
1271 // LDG/LDU SD node (from custom vector handling), then its the second operand
1272 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1273 Op1 = N->getOperand(2);
1274 Mem = cast<MemIntrinsicSDNode>(N);
1275 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1279 case Intrinsic::nvvm_ldg_global_f:
1280 case Intrinsic::nvvm_ldg_global_i:
1281 case Intrinsic::nvvm_ldg_global_p:
1284 case Intrinsic::nvvm_ldu_global_f:
1285 case Intrinsic::nvvm_ldu_global_i:
1286 case Intrinsic::nvvm_ldu_global_p:
1291 Op1 = N->getOperand(1);
1292 Mem = cast<MemSDNode>(N);
1298 SDValue Base, Offset, Addr;
1300 EVT EltVT = Mem->getMemoryVT();
1301 if (EltVT.isVector()) {
1302 EltVT = EltVT.getVectorElementType();
1305 if (SelectDirectAddr(Op1, Addr)) {
1306 switch (N->getOpcode()) {
1309 case ISD::INTRINSIC_W_CHAIN:
1311 switch (EltVT.getSimpleVT().SimpleTy) {
1315 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1318 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1321 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1324 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1327 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1330 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1334 switch (EltVT.getSimpleVT().SimpleTy) {
1338 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1341 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1344 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1347 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1350 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1353 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1358 case NVPTXISD::LDGV2:
1359 switch (EltVT.getSimpleVT().SimpleTy) {
1363 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1366 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1369 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1372 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1375 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1378 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1382 case NVPTXISD::LDUV2:
1383 switch (EltVT.getSimpleVT().SimpleTy) {
1387 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1390 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1393 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1396 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1399 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1402 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1406 case NVPTXISD::LDGV4:
1407 switch (EltVT.getSimpleVT().SimpleTy) {
1411 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1414 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1417 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1420 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1424 case NVPTXISD::LDUV4:
1425 switch (EltVT.getSimpleVT().SimpleTy) {
1429 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1432 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1435 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1438 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1444 SDValue Ops[] = { Addr, Chain };
1445 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1446 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1447 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1449 switch (N->getOpcode()) {
1453 case ISD::INTRINSIC_W_CHAIN:
1455 switch (EltVT.getSimpleVT().SimpleTy) {
1459 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1462 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1465 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1468 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1471 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1474 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1478 switch (EltVT.getSimpleVT().SimpleTy) {
1482 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1485 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1488 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1491 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1494 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1497 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1502 case NVPTXISD::LoadV2:
1503 case NVPTXISD::LDGV2:
1504 switch (EltVT.getSimpleVT().SimpleTy) {
1508 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1511 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1514 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1517 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1520 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1523 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1527 case NVPTXISD::LDUV2:
1528 switch (EltVT.getSimpleVT().SimpleTy) {
1532 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1535 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1538 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1541 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1544 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1547 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1551 case NVPTXISD::LoadV4:
1552 case NVPTXISD::LDGV4:
1553 switch (EltVT.getSimpleVT().SimpleTy) {
1557 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1560 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1563 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1566 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1570 case NVPTXISD::LDUV4:
1571 switch (EltVT.getSimpleVT().SimpleTy) {
1575 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1578 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1581 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1584 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1590 switch (N->getOpcode()) {
1594 case ISD::INTRINSIC_W_CHAIN:
1596 switch (EltVT.getSimpleVT().SimpleTy) {
1600 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1603 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1606 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1609 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1612 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1615 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1619 switch (EltVT.getSimpleVT().SimpleTy) {
1623 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1626 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1629 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1632 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1635 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1638 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1643 case NVPTXISD::LoadV2:
1644 case NVPTXISD::LDGV2:
1645 switch (EltVT.getSimpleVT().SimpleTy) {
1649 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1652 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1655 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1658 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1661 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1664 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1668 case NVPTXISD::LDUV2:
1669 switch (EltVT.getSimpleVT().SimpleTy) {
1673 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1676 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1679 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1682 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1685 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1688 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1692 case NVPTXISD::LoadV4:
1693 case NVPTXISD::LDGV4:
1694 switch (EltVT.getSimpleVT().SimpleTy) {
1698 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1701 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1704 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1707 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1711 case NVPTXISD::LDUV4:
1712 switch (EltVT.getSimpleVT().SimpleTy) {
1716 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1719 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1722 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1725 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1732 SDValue Ops[] = { Base, Offset, Chain };
1734 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1737 switch (N->getOpcode()) {
1741 case ISD::INTRINSIC_W_CHAIN:
1743 switch (EltVT.getSimpleVT().SimpleTy) {
1747 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1750 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1753 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1756 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1759 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1762 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1766 switch (EltVT.getSimpleVT().SimpleTy) {
1770 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1773 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1776 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1779 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1782 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1785 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1790 case NVPTXISD::LoadV2:
1791 case NVPTXISD::LDGV2:
1792 switch (EltVT.getSimpleVT().SimpleTy) {
1796 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1799 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1802 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1805 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1808 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1811 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1815 case NVPTXISD::LDUV2:
1816 switch (EltVT.getSimpleVT().SimpleTy) {
1820 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1823 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1826 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1829 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1832 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1835 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1839 case NVPTXISD::LoadV4:
1840 case NVPTXISD::LDGV4:
1841 switch (EltVT.getSimpleVT().SimpleTy) {
1845 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1848 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1851 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1854 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1858 case NVPTXISD::LDUV4:
1859 switch (EltVT.getSimpleVT().SimpleTy) {
1863 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1866 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1869 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1872 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1878 switch (N->getOpcode()) {
1882 case ISD::INTRINSIC_W_CHAIN:
1884 switch (EltVT.getSimpleVT().SimpleTy) {
1888 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1891 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1894 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1897 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1900 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1903 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1907 switch (EltVT.getSimpleVT().SimpleTy) {
1911 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1914 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1917 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1920 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1923 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1926 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1931 case NVPTXISD::LoadV2:
1932 case NVPTXISD::LDGV2:
1933 switch (EltVT.getSimpleVT().SimpleTy) {
1937 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1940 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1943 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1946 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1949 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1952 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1956 case NVPTXISD::LDUV2:
1957 switch (EltVT.getSimpleVT().SimpleTy) {
1961 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1964 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1967 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1970 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1973 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1976 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1980 case NVPTXISD::LoadV4:
1981 case NVPTXISD::LDGV4:
1982 switch (EltVT.getSimpleVT().SimpleTy) {
1986 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1989 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1992 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1995 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1999 case NVPTXISD::LDUV4:
2000 switch (EltVT.getSimpleVT().SimpleTy) {
2004 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
2007 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
2010 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
2013 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
2020 SDValue Ops[] = { Op1, Chain };
2021 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
2024 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2025 MemRefs0[0] = Mem->getMemOperand();
2026 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
2031 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
2033 StoreSDNode *ST = cast<StoreSDNode>(N);
2034 EVT StoreVT = ST->getMemoryVT();
2035 SDNode *NVPTXST = nullptr;
2037 // do not support pre/post inc/dec
2038 if (ST->isIndexed())
2041 if (!StoreVT.isSimple())
2044 // Address Space Setting
2045 unsigned int codeAddrSpace = getCodeAddrSpace(ST);
2048 // - .volatile is only availalble for .global and .shared
2049 bool isVolatile = ST->isVolatile();
2050 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2051 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2052 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2056 MVT SimpleVT = StoreVT.getSimpleVT();
2057 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
2058 if (SimpleVT.isVector()) {
2059 unsigned num = SimpleVT.getVectorNumElements();
2061 vecType = NVPTX::PTXLdStInstCode::V2;
2063 vecType = NVPTX::PTXLdStInstCode::V4;
2068 // Type Setting: toType + toTypeWidth
2069 // - for integer type, always use 'u'
2071 MVT ScalarVT = SimpleVT.getScalarType();
2072 unsigned toTypeWidth = ScalarVT.getSizeInBits();
2073 unsigned int toType;
2074 if (ScalarVT.isFloatingPoint())
2075 toType = NVPTX::PTXLdStInstCode::Float;
2077 toType = NVPTX::PTXLdStInstCode::Unsigned;
2079 // Create the machine instruction DAG
2080 SDValue Chain = N->getOperand(0);
2081 SDValue N1 = N->getOperand(1);
2082 SDValue N2 = N->getOperand(2);
2084 SDValue Offset, Base;
2086 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
2088 if (SelectDirectAddr(N2, Addr)) {
2091 Opcode = NVPTX::ST_i8_avar;
2094 Opcode = NVPTX::ST_i16_avar;
2097 Opcode = NVPTX::ST_i32_avar;
2100 Opcode = NVPTX::ST_i64_avar;
2103 Opcode = NVPTX::ST_f32_avar;
2106 Opcode = NVPTX::ST_f64_avar;
2111 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2112 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2113 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
2115 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2116 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2117 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2120 Opcode = NVPTX::ST_i8_asi;
2123 Opcode = NVPTX::ST_i16_asi;
2126 Opcode = NVPTX::ST_i32_asi;
2129 Opcode = NVPTX::ST_i64_asi;
2132 Opcode = NVPTX::ST_f32_asi;
2135 Opcode = NVPTX::ST_f64_asi;
2140 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2141 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2142 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2144 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2145 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2146 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2150 Opcode = NVPTX::ST_i8_ari_64;
2153 Opcode = NVPTX::ST_i16_ari_64;
2156 Opcode = NVPTX::ST_i32_ari_64;
2159 Opcode = NVPTX::ST_i64_ari_64;
2162 Opcode = NVPTX::ST_f32_ari_64;
2165 Opcode = NVPTX::ST_f64_ari_64;
2173 Opcode = NVPTX::ST_i8_ari;
2176 Opcode = NVPTX::ST_i16_ari;
2179 Opcode = NVPTX::ST_i32_ari;
2182 Opcode = NVPTX::ST_i64_ari;
2185 Opcode = NVPTX::ST_f32_ari;
2188 Opcode = NVPTX::ST_f64_ari;
2194 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2195 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2196 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2198 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2203 Opcode = NVPTX::ST_i8_areg_64;
2206 Opcode = NVPTX::ST_i16_areg_64;
2209 Opcode = NVPTX::ST_i32_areg_64;
2212 Opcode = NVPTX::ST_i64_areg_64;
2215 Opcode = NVPTX::ST_f32_areg_64;
2218 Opcode = NVPTX::ST_f64_areg_64;
2226 Opcode = NVPTX::ST_i8_areg;
2229 Opcode = NVPTX::ST_i16_areg;
2232 Opcode = NVPTX::ST_i32_areg;
2235 Opcode = NVPTX::ST_i64_areg;
2238 Opcode = NVPTX::ST_f32_areg;
2241 Opcode = NVPTX::ST_f64_areg;
2247 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2248 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2249 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
2251 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2255 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2256 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2257 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2263 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
2264 SDValue Chain = N->getOperand(0);
2265 SDValue Op1 = N->getOperand(1);
2266 SDValue Addr, Offset, Base;
2270 EVT EltVT = Op1.getValueType();
2271 MemSDNode *MemSD = cast<MemSDNode>(N);
2272 EVT StoreVT = MemSD->getMemoryVT();
2274 // Address Space Setting
2275 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
2277 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2278 report_fatal_error("Cannot store to pointer that points to constant "
2283 // - .volatile is only availalble for .global and .shared
2284 bool IsVolatile = MemSD->isVolatile();
2285 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2286 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2287 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2290 // Type Setting: toType + toTypeWidth
2291 // - for integer type, always use 'u'
2292 assert(StoreVT.isSimple() && "Store value is not simple");
2293 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2294 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2296 if (ScalarVT.isFloatingPoint())
2297 ToType = NVPTX::PTXLdStInstCode::Float;
2299 ToType = NVPTX::PTXLdStInstCode::Unsigned;
2301 SmallVector<SDValue, 12> StOps;
2305 switch (N->getOpcode()) {
2306 case NVPTXISD::StoreV2:
2307 VecType = NVPTX::PTXLdStInstCode::V2;
2308 StOps.push_back(N->getOperand(1));
2309 StOps.push_back(N->getOperand(2));
2310 N2 = N->getOperand(3);
2312 case NVPTXISD::StoreV4:
2313 VecType = NVPTX::PTXLdStInstCode::V4;
2314 StOps.push_back(N->getOperand(1));
2315 StOps.push_back(N->getOperand(2));
2316 StOps.push_back(N->getOperand(3));
2317 StOps.push_back(N->getOperand(4));
2318 N2 = N->getOperand(5);
2324 StOps.push_back(getI32Imm(IsVolatile, DL));
2325 StOps.push_back(getI32Imm(CodeAddrSpace, DL));
2326 StOps.push_back(getI32Imm(VecType, DL));
2327 StOps.push_back(getI32Imm(ToType, DL));
2328 StOps.push_back(getI32Imm(ToTypeWidth, DL));
2330 if (SelectDirectAddr(N2, Addr)) {
2331 switch (N->getOpcode()) {
2334 case NVPTXISD::StoreV2:
2335 switch (EltVT.getSimpleVT().SimpleTy) {
2339 Opcode = NVPTX::STV_i8_v2_avar;
2342 Opcode = NVPTX::STV_i16_v2_avar;
2345 Opcode = NVPTX::STV_i32_v2_avar;
2348 Opcode = NVPTX::STV_i64_v2_avar;
2351 Opcode = NVPTX::STV_f32_v2_avar;
2354 Opcode = NVPTX::STV_f64_v2_avar;
2358 case NVPTXISD::StoreV4:
2359 switch (EltVT.getSimpleVT().SimpleTy) {
2363 Opcode = NVPTX::STV_i8_v4_avar;
2366 Opcode = NVPTX::STV_i16_v4_avar;
2369 Opcode = NVPTX::STV_i32_v4_avar;
2372 Opcode = NVPTX::STV_f32_v4_avar;
2377 StOps.push_back(Addr);
2378 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2379 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2380 switch (N->getOpcode()) {
2383 case NVPTXISD::StoreV2:
2384 switch (EltVT.getSimpleVT().SimpleTy) {
2388 Opcode = NVPTX::STV_i8_v2_asi;
2391 Opcode = NVPTX::STV_i16_v2_asi;
2394 Opcode = NVPTX::STV_i32_v2_asi;
2397 Opcode = NVPTX::STV_i64_v2_asi;
2400 Opcode = NVPTX::STV_f32_v2_asi;
2403 Opcode = NVPTX::STV_f64_v2_asi;
2407 case NVPTXISD::StoreV4:
2408 switch (EltVT.getSimpleVT().SimpleTy) {
2412 Opcode = NVPTX::STV_i8_v4_asi;
2415 Opcode = NVPTX::STV_i16_v4_asi;
2418 Opcode = NVPTX::STV_i32_v4_asi;
2421 Opcode = NVPTX::STV_f32_v4_asi;
2426 StOps.push_back(Base);
2427 StOps.push_back(Offset);
2428 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2429 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2431 switch (N->getOpcode()) {
2434 case NVPTXISD::StoreV2:
2435 switch (EltVT.getSimpleVT().SimpleTy) {
2439 Opcode = NVPTX::STV_i8_v2_ari_64;
2442 Opcode = NVPTX::STV_i16_v2_ari_64;
2445 Opcode = NVPTX::STV_i32_v2_ari_64;
2448 Opcode = NVPTX::STV_i64_v2_ari_64;
2451 Opcode = NVPTX::STV_f32_v2_ari_64;
2454 Opcode = NVPTX::STV_f64_v2_ari_64;
2458 case NVPTXISD::StoreV4:
2459 switch (EltVT.getSimpleVT().SimpleTy) {
2463 Opcode = NVPTX::STV_i8_v4_ari_64;
2466 Opcode = NVPTX::STV_i16_v4_ari_64;
2469 Opcode = NVPTX::STV_i32_v4_ari_64;
2472 Opcode = NVPTX::STV_f32_v4_ari_64;
2478 switch (N->getOpcode()) {
2481 case NVPTXISD::StoreV2:
2482 switch (EltVT.getSimpleVT().SimpleTy) {
2486 Opcode = NVPTX::STV_i8_v2_ari;
2489 Opcode = NVPTX::STV_i16_v2_ari;
2492 Opcode = NVPTX::STV_i32_v2_ari;
2495 Opcode = NVPTX::STV_i64_v2_ari;
2498 Opcode = NVPTX::STV_f32_v2_ari;
2501 Opcode = NVPTX::STV_f64_v2_ari;
2505 case NVPTXISD::StoreV4:
2506 switch (EltVT.getSimpleVT().SimpleTy) {
2510 Opcode = NVPTX::STV_i8_v4_ari;
2513 Opcode = NVPTX::STV_i16_v4_ari;
2516 Opcode = NVPTX::STV_i32_v4_ari;
2519 Opcode = NVPTX::STV_f32_v4_ari;
2525 StOps.push_back(Base);
2526 StOps.push_back(Offset);
2529 switch (N->getOpcode()) {
2532 case NVPTXISD::StoreV2:
2533 switch (EltVT.getSimpleVT().SimpleTy) {
2537 Opcode = NVPTX::STV_i8_v2_areg_64;
2540 Opcode = NVPTX::STV_i16_v2_areg_64;
2543 Opcode = NVPTX::STV_i32_v2_areg_64;
2546 Opcode = NVPTX::STV_i64_v2_areg_64;
2549 Opcode = NVPTX::STV_f32_v2_areg_64;
2552 Opcode = NVPTX::STV_f64_v2_areg_64;
2556 case NVPTXISD::StoreV4:
2557 switch (EltVT.getSimpleVT().SimpleTy) {
2561 Opcode = NVPTX::STV_i8_v4_areg_64;
2564 Opcode = NVPTX::STV_i16_v4_areg_64;
2567 Opcode = NVPTX::STV_i32_v4_areg_64;
2570 Opcode = NVPTX::STV_f32_v4_areg_64;
2576 switch (N->getOpcode()) {
2579 case NVPTXISD::StoreV2:
2580 switch (EltVT.getSimpleVT().SimpleTy) {
2584 Opcode = NVPTX::STV_i8_v2_areg;
2587 Opcode = NVPTX::STV_i16_v2_areg;
2590 Opcode = NVPTX::STV_i32_v2_areg;
2593 Opcode = NVPTX::STV_i64_v2_areg;
2596 Opcode = NVPTX::STV_f32_v2_areg;
2599 Opcode = NVPTX::STV_f64_v2_areg;
2603 case NVPTXISD::StoreV4:
2604 switch (EltVT.getSimpleVT().SimpleTy) {
2608 Opcode = NVPTX::STV_i8_v4_areg;
2611 Opcode = NVPTX::STV_i16_v4_areg;
2614 Opcode = NVPTX::STV_i32_v4_areg;
2617 Opcode = NVPTX::STV_f32_v4_areg;
2623 StOps.push_back(N2);
2626 StOps.push_back(Chain);
2628 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2630 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2631 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2632 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2637 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2638 SDValue Chain = Node->getOperand(0);
2639 SDValue Offset = Node->getOperand(2);
2640 SDValue Flag = Node->getOperand(3);
2642 MemSDNode *Mem = cast<MemSDNode>(Node);
2645 switch (Node->getOpcode()) {
2648 case NVPTXISD::LoadParam:
2651 case NVPTXISD::LoadParamV2:
2654 case NVPTXISD::LoadParamV4:
2659 EVT EltVT = Node->getValueType(0);
2660 EVT MemVT = Mem->getMemoryVT();
2668 switch (MemVT.getSimpleVT().SimpleTy) {
2672 Opc = NVPTX::LoadParamMemI8;
2675 Opc = NVPTX::LoadParamMemI8;
2678 Opc = NVPTX::LoadParamMemI16;
2681 Opc = NVPTX::LoadParamMemI32;
2684 Opc = NVPTX::LoadParamMemI64;
2687 Opc = NVPTX::LoadParamMemF32;
2690 Opc = NVPTX::LoadParamMemF64;
2695 switch (MemVT.getSimpleVT().SimpleTy) {
2699 Opc = NVPTX::LoadParamMemV2I8;
2702 Opc = NVPTX::LoadParamMemV2I8;
2705 Opc = NVPTX::LoadParamMemV2I16;
2708 Opc = NVPTX::LoadParamMemV2I32;
2711 Opc = NVPTX::LoadParamMemV2I64;
2714 Opc = NVPTX::LoadParamMemV2F32;
2717 Opc = NVPTX::LoadParamMemV2F64;
2722 switch (MemVT.getSimpleVT().SimpleTy) {
2726 Opc = NVPTX::LoadParamMemV4I8;
2729 Opc = NVPTX::LoadParamMemV4I8;
2732 Opc = NVPTX::LoadParamMemV4I16;
2735 Opc = NVPTX::LoadParamMemV4I32;
2738 Opc = NVPTX::LoadParamMemV4F32;
2746 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2747 } else if (VecSize == 2) {
2748 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2750 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2751 VTs = CurDAG->getVTList(EVTs);
2754 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2756 SmallVector<SDValue, 2> Ops;
2757 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2758 Ops.push_back(Chain);
2759 Ops.push_back(Flag);
2761 return CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2764 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2766 SDValue Chain = N->getOperand(0);
2767 SDValue Offset = N->getOperand(1);
2768 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2769 MemSDNode *Mem = cast<MemSDNode>(N);
2771 // How many elements do we have?
2772 unsigned NumElts = 1;
2773 switch (N->getOpcode()) {
2776 case NVPTXISD::StoreRetval:
2779 case NVPTXISD::StoreRetvalV2:
2782 case NVPTXISD::StoreRetvalV4:
2787 // Build vector of operands
2788 SmallVector<SDValue, 6> Ops;
2789 for (unsigned i = 0; i < NumElts; ++i)
2790 Ops.push_back(N->getOperand(i + 2));
2791 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2792 Ops.push_back(Chain);
2794 // Determine target opcode
2795 // If we have an i1, use an 8-bit store. The lowering code in
2796 // NVPTXISelLowering will have already emitted an upcast.
2797 unsigned Opcode = 0;
2802 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2806 Opcode = NVPTX::StoreRetvalI8;
2809 Opcode = NVPTX::StoreRetvalI8;
2812 Opcode = NVPTX::StoreRetvalI16;
2815 Opcode = NVPTX::StoreRetvalI32;
2818 Opcode = NVPTX::StoreRetvalI64;
2821 Opcode = NVPTX::StoreRetvalF32;
2824 Opcode = NVPTX::StoreRetvalF64;
2829 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2833 Opcode = NVPTX::StoreRetvalV2I8;
2836 Opcode = NVPTX::StoreRetvalV2I8;
2839 Opcode = NVPTX::StoreRetvalV2I16;
2842 Opcode = NVPTX::StoreRetvalV2I32;
2845 Opcode = NVPTX::StoreRetvalV2I64;
2848 Opcode = NVPTX::StoreRetvalV2F32;
2851 Opcode = NVPTX::StoreRetvalV2F64;
2856 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2860 Opcode = NVPTX::StoreRetvalV4I8;
2863 Opcode = NVPTX::StoreRetvalV4I8;
2866 Opcode = NVPTX::StoreRetvalV4I16;
2869 Opcode = NVPTX::StoreRetvalV4I32;
2872 Opcode = NVPTX::StoreRetvalV4F32;
2879 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2880 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2881 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2882 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2887 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2889 SDValue Chain = N->getOperand(0);
2890 SDValue Param = N->getOperand(1);
2891 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2892 SDValue Offset = N->getOperand(2);
2893 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2894 MemSDNode *Mem = cast<MemSDNode>(N);
2895 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2897 // How many elements do we have?
2898 unsigned NumElts = 1;
2899 switch (N->getOpcode()) {
2902 case NVPTXISD::StoreParamU32:
2903 case NVPTXISD::StoreParamS32:
2904 case NVPTXISD::StoreParam:
2907 case NVPTXISD::StoreParamV2:
2910 case NVPTXISD::StoreParamV4:
2915 // Build vector of operands
2916 SmallVector<SDValue, 8> Ops;
2917 for (unsigned i = 0; i < NumElts; ++i)
2918 Ops.push_back(N->getOperand(i + 3));
2919 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2920 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2921 Ops.push_back(Chain);
2922 Ops.push_back(Flag);
2924 // Determine target opcode
2925 // If we have an i1, use an 8-bit store. The lowering code in
2926 // NVPTXISelLowering will have already emitted an upcast.
2927 unsigned Opcode = 0;
2928 switch (N->getOpcode()) {
2934 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2938 Opcode = NVPTX::StoreParamI8;
2941 Opcode = NVPTX::StoreParamI8;
2944 Opcode = NVPTX::StoreParamI16;
2947 Opcode = NVPTX::StoreParamI32;
2950 Opcode = NVPTX::StoreParamI64;
2953 Opcode = NVPTX::StoreParamF32;
2956 Opcode = NVPTX::StoreParamF64;
2961 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2965 Opcode = NVPTX::StoreParamV2I8;
2968 Opcode = NVPTX::StoreParamV2I8;
2971 Opcode = NVPTX::StoreParamV2I16;
2974 Opcode = NVPTX::StoreParamV2I32;
2977 Opcode = NVPTX::StoreParamV2I64;
2980 Opcode = NVPTX::StoreParamV2F32;
2983 Opcode = NVPTX::StoreParamV2F64;
2988 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2992 Opcode = NVPTX::StoreParamV4I8;
2995 Opcode = NVPTX::StoreParamV4I8;
2998 Opcode = NVPTX::StoreParamV4I16;
3001 Opcode = NVPTX::StoreParamV4I32;
3004 Opcode = NVPTX::StoreParamV4F32;
3010 // Special case: if we have a sign-extend/zero-extend node, insert the
3011 // conversion instruction first, and use that as the value operand to
3012 // the selected StoreParam node.
3013 case NVPTXISD::StoreParamU32: {
3014 Opcode = NVPTX::StoreParamI32;
3015 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
3017 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
3018 MVT::i32, Ops[0], CvtNone);
3019 Ops[0] = SDValue(Cvt, 0);
3022 case NVPTXISD::StoreParamS32: {
3023 Opcode = NVPTX::StoreParamI32;
3024 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
3026 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
3027 MVT::i32, Ops[0], CvtNone);
3028 Ops[0] = SDValue(Cvt, 0);
3033 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
3035 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
3036 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
3037 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
3038 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
3043 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
3044 SDValue Chain = N->getOperand(0);
3045 SDNode *Ret = nullptr;
3047 SmallVector<SDValue, 8> Ops;
3049 switch (N->getOpcode()) {
3050 default: return nullptr;
3051 case NVPTXISD::Tex1DFloatS32:
3052 Opc = NVPTX::TEX_1D_F32_S32;
3054 case NVPTXISD::Tex1DFloatFloat:
3055 Opc = NVPTX::TEX_1D_F32_F32;
3057 case NVPTXISD::Tex1DFloatFloatLevel:
3058 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3060 case NVPTXISD::Tex1DFloatFloatGrad:
3061 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3063 case NVPTXISD::Tex1DS32S32:
3064 Opc = NVPTX::TEX_1D_S32_S32;
3066 case NVPTXISD::Tex1DS32Float:
3067 Opc = NVPTX::TEX_1D_S32_F32;
3069 case NVPTXISD::Tex1DS32FloatLevel:
3070 Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
3072 case NVPTXISD::Tex1DS32FloatGrad:
3073 Opc = NVPTX::TEX_1D_S32_F32_GRAD;
3075 case NVPTXISD::Tex1DU32S32:
3076 Opc = NVPTX::TEX_1D_U32_S32;
3078 case NVPTXISD::Tex1DU32Float:
3079 Opc = NVPTX::TEX_1D_U32_F32;
3081 case NVPTXISD::Tex1DU32FloatLevel:
3082 Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3084 case NVPTXISD::Tex1DU32FloatGrad:
3085 Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3087 case NVPTXISD::Tex1DArrayFloatS32:
3088 Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
3090 case NVPTXISD::Tex1DArrayFloatFloat:
3091 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3093 case NVPTXISD::Tex1DArrayFloatFloatLevel:
3094 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3096 case NVPTXISD::Tex1DArrayFloatFloatGrad:
3097 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3099 case NVPTXISD::Tex1DArrayS32S32:
3100 Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
3102 case NVPTXISD::Tex1DArrayS32Float:
3103 Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
3105 case NVPTXISD::Tex1DArrayS32FloatLevel:
3106 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
3108 case NVPTXISD::Tex1DArrayS32FloatGrad:
3109 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
3111 case NVPTXISD::Tex1DArrayU32S32:
3112 Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3114 case NVPTXISD::Tex1DArrayU32Float:
3115 Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3117 case NVPTXISD::Tex1DArrayU32FloatLevel:
3118 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3120 case NVPTXISD::Tex1DArrayU32FloatGrad:
3121 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3123 case NVPTXISD::Tex2DFloatS32:
3124 Opc = NVPTX::TEX_2D_F32_S32;
3126 case NVPTXISD::Tex2DFloatFloat:
3127 Opc = NVPTX::TEX_2D_F32_F32;
3129 case NVPTXISD::Tex2DFloatFloatLevel:
3130 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3132 case NVPTXISD::Tex2DFloatFloatGrad:
3133 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3135 case NVPTXISD::Tex2DS32S32:
3136 Opc = NVPTX::TEX_2D_S32_S32;
3138 case NVPTXISD::Tex2DS32Float:
3139 Opc = NVPTX::TEX_2D_S32_F32;
3141 case NVPTXISD::Tex2DS32FloatLevel:
3142 Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
3144 case NVPTXISD::Tex2DS32FloatGrad:
3145 Opc = NVPTX::TEX_2D_S32_F32_GRAD;
3147 case NVPTXISD::Tex2DU32S32:
3148 Opc = NVPTX::TEX_2D_U32_S32;
3150 case NVPTXISD::Tex2DU32Float:
3151 Opc = NVPTX::TEX_2D_U32_F32;
3153 case NVPTXISD::Tex2DU32FloatLevel:
3154 Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3156 case NVPTXISD::Tex2DU32FloatGrad:
3157 Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3159 case NVPTXISD::Tex2DArrayFloatS32:
3160 Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
3162 case NVPTXISD::Tex2DArrayFloatFloat:
3163 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3165 case NVPTXISD::Tex2DArrayFloatFloatLevel:
3166 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3168 case NVPTXISD::Tex2DArrayFloatFloatGrad:
3169 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3171 case NVPTXISD::Tex2DArrayS32S32:
3172 Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
3174 case NVPTXISD::Tex2DArrayS32Float:
3175 Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
3177 case NVPTXISD::Tex2DArrayS32FloatLevel:
3178 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
3180 case NVPTXISD::Tex2DArrayS32FloatGrad:
3181 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
3183 case NVPTXISD::Tex2DArrayU32S32:
3184 Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3186 case NVPTXISD::Tex2DArrayU32Float:
3187 Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3189 case NVPTXISD::Tex2DArrayU32FloatLevel:
3190 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3192 case NVPTXISD::Tex2DArrayU32FloatGrad:
3193 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3195 case NVPTXISD::Tex3DFloatS32:
3196 Opc = NVPTX::TEX_3D_F32_S32;
3198 case NVPTXISD::Tex3DFloatFloat:
3199 Opc = NVPTX::TEX_3D_F32_F32;
3201 case NVPTXISD::Tex3DFloatFloatLevel:
3202 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3204 case NVPTXISD::Tex3DFloatFloatGrad:
3205 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3207 case NVPTXISD::Tex3DS32S32:
3208 Opc = NVPTX::TEX_3D_S32_S32;
3210 case NVPTXISD::Tex3DS32Float:
3211 Opc = NVPTX::TEX_3D_S32_F32;
3213 case NVPTXISD::Tex3DS32FloatLevel:
3214 Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
3216 case NVPTXISD::Tex3DS32FloatGrad:
3217 Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3219 case NVPTXISD::Tex3DU32S32:
3220 Opc = NVPTX::TEX_3D_U32_S32;
3222 case NVPTXISD::Tex3DU32Float:
3223 Opc = NVPTX::TEX_3D_U32_F32;
3225 case NVPTXISD::Tex3DU32FloatLevel:
3226 Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3228 case NVPTXISD::Tex3DU32FloatGrad:
3229 Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3231 case NVPTXISD::TexCubeFloatFloat:
3232 Opc = NVPTX::TEX_CUBE_F32_F32;
3234 case NVPTXISD::TexCubeFloatFloatLevel:
3235 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3237 case NVPTXISD::TexCubeS32Float:
3238 Opc = NVPTX::TEX_CUBE_S32_F32;
3240 case NVPTXISD::TexCubeS32FloatLevel:
3241 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3243 case NVPTXISD::TexCubeU32Float:
3244 Opc = NVPTX::TEX_CUBE_U32_F32;
3246 case NVPTXISD::TexCubeU32FloatLevel:
3247 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3249 case NVPTXISD::TexCubeArrayFloatFloat:
3250 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3252 case NVPTXISD::TexCubeArrayFloatFloatLevel:
3253 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3255 case NVPTXISD::TexCubeArrayS32Float:
3256 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3258 case NVPTXISD::TexCubeArrayS32FloatLevel:
3259 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3261 case NVPTXISD::TexCubeArrayU32Float:
3262 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3264 case NVPTXISD::TexCubeArrayU32FloatLevel:
3265 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3267 case NVPTXISD::Tld4R2DFloatFloat:
3268 Opc = NVPTX::TLD4_R_2D_F32_F32;
3270 case NVPTXISD::Tld4G2DFloatFloat:
3271 Opc = NVPTX::TLD4_G_2D_F32_F32;
3273 case NVPTXISD::Tld4B2DFloatFloat:
3274 Opc = NVPTX::TLD4_B_2D_F32_F32;
3276 case NVPTXISD::Tld4A2DFloatFloat:
3277 Opc = NVPTX::TLD4_A_2D_F32_F32;
3279 case NVPTXISD::Tld4R2DS64Float:
3280 Opc = NVPTX::TLD4_R_2D_S32_F32;
3282 case NVPTXISD::Tld4G2DS64Float:
3283 Opc = NVPTX::TLD4_G_2D_S32_F32;
3285 case NVPTXISD::Tld4B2DS64Float:
3286 Opc = NVPTX::TLD4_B_2D_S32_F32;
3288 case NVPTXISD::Tld4A2DS64Float:
3289 Opc = NVPTX::TLD4_A_2D_S32_F32;
3291 case NVPTXISD::Tld4R2DU64Float:
3292 Opc = NVPTX::TLD4_R_2D_U32_F32;
3294 case NVPTXISD::Tld4G2DU64Float:
3295 Opc = NVPTX::TLD4_G_2D_U32_F32;
3297 case NVPTXISD::Tld4B2DU64Float:
3298 Opc = NVPTX::TLD4_B_2D_U32_F32;
3300 case NVPTXISD::Tld4A2DU64Float:
3301 Opc = NVPTX::TLD4_A_2D_U32_F32;
3303 case NVPTXISD::TexUnified1DFloatS32:
3304 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3306 case NVPTXISD::TexUnified1DFloatFloat:
3307 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3309 case NVPTXISD::TexUnified1DFloatFloatLevel:
3310 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3312 case NVPTXISD::TexUnified1DFloatFloatGrad:
3313 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3315 case NVPTXISD::TexUnified1DS32S32:
3316 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3318 case NVPTXISD::TexUnified1DS32Float:
3319 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3321 case NVPTXISD::TexUnified1DS32FloatLevel:
3322 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3324 case NVPTXISD::TexUnified1DS32FloatGrad:
3325 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3327 case NVPTXISD::TexUnified1DU32S32:
3328 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3330 case NVPTXISD::TexUnified1DU32Float:
3331 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3333 case NVPTXISD::TexUnified1DU32FloatLevel:
3334 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3336 case NVPTXISD::TexUnified1DU32FloatGrad:
3337 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3339 case NVPTXISD::TexUnified1DArrayFloatS32:
3340 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3342 case NVPTXISD::TexUnified1DArrayFloatFloat:
3343 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3345 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
3346 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3348 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
3349 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3351 case NVPTXISD::TexUnified1DArrayS32S32:
3352 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3354 case NVPTXISD::TexUnified1DArrayS32Float:
3355 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3357 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
3358 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3360 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
3361 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3363 case NVPTXISD::TexUnified1DArrayU32S32:
3364 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3366 case NVPTXISD::TexUnified1DArrayU32Float:
3367 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3369 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
3370 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3372 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
3373 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3375 case NVPTXISD::TexUnified2DFloatS32:
3376 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3378 case NVPTXISD::TexUnified2DFloatFloat:
3379 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3381 case NVPTXISD::TexUnified2DFloatFloatLevel:
3382 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3384 case NVPTXISD::TexUnified2DFloatFloatGrad:
3385 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3387 case NVPTXISD::TexUnified2DS32S32:
3388 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3390 case NVPTXISD::TexUnified2DS32Float:
3391 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3393 case NVPTXISD::TexUnified2DS32FloatLevel:
3394 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3396 case NVPTXISD::TexUnified2DS32FloatGrad:
3397 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3399 case NVPTXISD::TexUnified2DU32S32:
3400 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3402 case NVPTXISD::TexUnified2DU32Float:
3403 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3405 case NVPTXISD::TexUnified2DU32FloatLevel:
3406 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3408 case NVPTXISD::TexUnified2DU32FloatGrad:
3409 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3411 case NVPTXISD::TexUnified2DArrayFloatS32:
3412 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3414 case NVPTXISD::TexUnified2DArrayFloatFloat:
3415 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3417 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
3418 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3420 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
3421 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3423 case NVPTXISD::TexUnified2DArrayS32S32:
3424 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3426 case NVPTXISD::TexUnified2DArrayS32Float:
3427 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3429 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
3430 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3432 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
3433 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3435 case NVPTXISD::TexUnified2DArrayU32S32:
3436 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3438 case NVPTXISD::TexUnified2DArrayU32Float:
3439 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3441 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
3442 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3444 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
3445 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3447 case NVPTXISD::TexUnified3DFloatS32:
3448 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3450 case NVPTXISD::TexUnified3DFloatFloat:
3451 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3453 case NVPTXISD::TexUnified3DFloatFloatLevel:
3454 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3456 case NVPTXISD::TexUnified3DFloatFloatGrad:
3457 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3459 case NVPTXISD::TexUnified3DS32S32:
3460 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3462 case NVPTXISD::TexUnified3DS32Float:
3463 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3465 case NVPTXISD::TexUnified3DS32FloatLevel:
3466 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3468 case NVPTXISD::TexUnified3DS32FloatGrad:
3469 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3471 case NVPTXISD::TexUnified3DU32S32:
3472 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3474 case NVPTXISD::TexUnified3DU32Float:
3475 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3477 case NVPTXISD::TexUnified3DU32FloatLevel:
3478 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3480 case NVPTXISD::TexUnified3DU32FloatGrad:
3481 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3483 case NVPTXISD::TexUnifiedCubeFloatFloat:
3484 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3486 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
3487 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3489 case NVPTXISD::TexUnifiedCubeS32Float:
3490 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3492 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
3493 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3495 case NVPTXISD::TexUnifiedCubeU32Float:
3496 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3498 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
3499 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3501 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
3502 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3504 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
3505 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3507 case NVPTXISD::TexUnifiedCubeArrayS32Float:
3508 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3510 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
3511 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3513 case NVPTXISD::TexUnifiedCubeArrayU32Float:
3514 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3516 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
3517 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3519 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
3520 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3522 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
3523 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3525 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
3526 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3528 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
3529 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3531 case NVPTXISD::Tld4UnifiedR2DS64Float:
3532 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3534 case NVPTXISD::Tld4UnifiedG2DS64Float:
3535 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3537 case NVPTXISD::Tld4UnifiedB2DS64Float:
3538 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3540 case NVPTXISD::Tld4UnifiedA2DS64Float:
3541 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3543 case NVPTXISD::Tld4UnifiedR2DU64Float:
3544 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3546 case NVPTXISD::Tld4UnifiedG2DU64Float:
3547 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3549 case NVPTXISD::Tld4UnifiedB2DU64Float:
3550 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3552 case NVPTXISD::Tld4UnifiedA2DU64Float:
3553 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
3557 // Copy over operands
3558 for (unsigned i = 1; i < N->getNumOperands(); ++i) {
3559 Ops.push_back(N->getOperand(i));
3562 Ops.push_back(Chain);
3563 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3567 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
3568 SDValue Chain = N->getOperand(0);
3569 SDValue TexHandle = N->getOperand(1);
3570 SDNode *Ret = nullptr;
3572 SmallVector<SDValue, 8> Ops;
3573 switch (N->getOpcode()) {
3574 default: return nullptr;
3575 case NVPTXISD::Suld1DI8Clamp:
3576 Opc = NVPTX::SULD_1D_I8_CLAMP;
3577 Ops.push_back(TexHandle);
3578 Ops.push_back(N->getOperand(2));
3579 Ops.push_back(Chain);
3581 case NVPTXISD::Suld1DI16Clamp:
3582 Opc = NVPTX::SULD_1D_I16_CLAMP;
3583 Ops.push_back(TexHandle);
3584 Ops.push_back(N->getOperand(2));
3585 Ops.push_back(Chain);
3587 case NVPTXISD::Suld1DI32Clamp:
3588 Opc = NVPTX::SULD_1D_I32_CLAMP;
3589 Ops.push_back(TexHandle);
3590 Ops.push_back(N->getOperand(2));
3591 Ops.push_back(Chain);
3593 case NVPTXISD::Suld1DI64Clamp:
3594 Opc = NVPTX::SULD_1D_I64_CLAMP;
3595 Ops.push_back(TexHandle);
3596 Ops.push_back(N->getOperand(2));
3597 Ops.push_back(Chain);
3599 case NVPTXISD::Suld1DV2I8Clamp:
3600 Opc = NVPTX::SULD_1D_V2I8_CLAMP;
3601 Ops.push_back(TexHandle);
3602 Ops.push_back(N->getOperand(2));
3603 Ops.push_back(Chain);
3605 case NVPTXISD::Suld1DV2I16Clamp:
3606 Opc = NVPTX::SULD_1D_V2I16_CLAMP;
3607 Ops.push_back(TexHandle);
3608 Ops.push_back(N->getOperand(2));
3609 Ops.push_back(Chain);
3611 case NVPTXISD::Suld1DV2I32Clamp:
3612 Opc = NVPTX::SULD_1D_V2I32_CLAMP;
3613 Ops.push_back(TexHandle);
3614 Ops.push_back(N->getOperand(2));
3615 Ops.push_back(Chain);
3617 case NVPTXISD::Suld1DV2I64Clamp:
3618 Opc = NVPTX::SULD_1D_V2I64_CLAMP;
3619 Ops.push_back(TexHandle);
3620 Ops.push_back(N->getOperand(2));
3621 Ops.push_back(Chain);
3623 case NVPTXISD::Suld1DV4I8Clamp:
3624 Opc = NVPTX::SULD_1D_V4I8_CLAMP;
3625 Ops.push_back(TexHandle);
3626 Ops.push_back(N->getOperand(2));
3627 Ops.push_back(Chain);
3629 case NVPTXISD::Suld1DV4I16Clamp:
3630 Opc = NVPTX::SULD_1D_V4I16_CLAMP;
3631 Ops.push_back(TexHandle);
3632 Ops.push_back(N->getOperand(2));
3633 Ops.push_back(Chain);
3635 case NVPTXISD::Suld1DV4I32Clamp:
3636 Opc = NVPTX::SULD_1D_V4I32_CLAMP;
3637 Ops.push_back(TexHandle);
3638 Ops.push_back(N->getOperand(2));
3639 Ops.push_back(Chain);
3641 case NVPTXISD::Suld1DArrayI8Clamp:
3642 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
3643 Ops.push_back(TexHandle);
3644 Ops.push_back(N->getOperand(2));
3645 Ops.push_back(N->getOperand(3));
3646 Ops.push_back(Chain);
3648 case NVPTXISD::Suld1DArrayI16Clamp:
3649 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
3650 Ops.push_back(TexHandle);
3651 Ops.push_back(N->getOperand(2));
3652 Ops.push_back(N->getOperand(3));
3653 Ops.push_back(Chain);
3655 case NVPTXISD::Suld1DArrayI32Clamp:
3656 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
3657 Ops.push_back(TexHandle);
3658 Ops.push_back(N->getOperand(2));
3659 Ops.push_back(N->getOperand(3));
3660 Ops.push_back(Chain);
3662 case NVPTXISD::Suld1DArrayI64Clamp:
3663 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
3664 Ops.push_back(TexHandle);
3665 Ops.push_back(N->getOperand(2));
3666 Ops.push_back(N->getOperand(3));
3667 Ops.push_back(Chain);
3669 case NVPTXISD::Suld1DArrayV2I8Clamp:
3670 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
3671 Ops.push_back(TexHandle);
3672 Ops.push_back(N->getOperand(2));
3673 Ops.push_back(N->getOperand(3));
3674 Ops.push_back(Chain);
3676 case NVPTXISD::Suld1DArrayV2I16Clamp:
3677 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
3678 Ops.push_back(TexHandle);
3679 Ops.push_back(N->getOperand(2));
3680 Ops.push_back(N->getOperand(3));
3681 Ops.push_back(Chain);
3683 case NVPTXISD::Suld1DArrayV2I32Clamp:
3684 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
3685 Ops.push_back(TexHandle);
3686 Ops.push_back(N->getOperand(2));
3687 Ops.push_back(N->getOperand(3));
3688 Ops.push_back(Chain);
3690 case NVPTXISD::Suld1DArrayV2I64Clamp:
3691 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
3692 Ops.push_back(TexHandle);
3693 Ops.push_back(N->getOperand(2));
3694 Ops.push_back(N->getOperand(3));
3695 Ops.push_back(Chain);
3697 case NVPTXISD::Suld1DArrayV4I8Clamp:
3698 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
3699 Ops.push_back(TexHandle);
3700 Ops.push_back(N->getOperand(2));
3701 Ops.push_back(N->getOperand(3));
3702 Ops.push_back(Chain);
3704 case NVPTXISD::Suld1DArrayV4I16Clamp:
3705 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
3706 Ops.push_back(TexHandle);
3707 Ops.push_back(N->getOperand(2));
3708 Ops.push_back(N->getOperand(3));
3709 Ops.push_back(Chain);
3711 case NVPTXISD::Suld1DArrayV4I32Clamp:
3712 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
3713 Ops.push_back(TexHandle);
3714 Ops.push_back(N->getOperand(2));
3715 Ops.push_back(N->getOperand(3));
3716 Ops.push_back(Chain);
3718 case NVPTXISD::Suld2DI8Clamp:
3719 Opc = NVPTX::SULD_2D_I8_CLAMP;
3720 Ops.push_back(TexHandle);
3721 Ops.push_back(N->getOperand(2));
3722 Ops.push_back(N->getOperand(3));
3723 Ops.push_back(Chain);
3725 case NVPTXISD::Suld2DI16Clamp:
3726 Opc = NVPTX::SULD_2D_I16_CLAMP;
3727 Ops.push_back(TexHandle);
3728 Ops.push_back(N->getOperand(2));
3729 Ops.push_back(N->getOperand(3));
3730 Ops.push_back(Chain);
3732 case NVPTXISD::Suld2DI32Clamp:
3733 Opc = NVPTX::SULD_2D_I32_CLAMP;
3734 Ops.push_back(TexHandle);
3735 Ops.push_back(N->getOperand(2));
3736 Ops.push_back(N->getOperand(3));
3737 Ops.push_back(Chain);
3739 case NVPTXISD::Suld2DI64Clamp:
3740 Opc = NVPTX::SULD_2D_I64_CLAMP;
3741 Ops.push_back(TexHandle);
3742 Ops.push_back(N->getOperand(2));
3743 Ops.push_back(N->getOperand(3));
3744 Ops.push_back(Chain);
3746 case NVPTXISD::Suld2DV2I8Clamp:
3747 Opc = NVPTX::SULD_2D_V2I8_CLAMP;
3748 Ops.push_back(TexHandle);
3749 Ops.push_back(N->getOperand(2));
3750 Ops.push_back(N->getOperand(3));
3751 Ops.push_back(Chain);
3753 case NVPTXISD::Suld2DV2I16Clamp:
3754 Opc = NVPTX::SULD_2D_V2I16_CLAMP;
3755 Ops.push_back(TexHandle);
3756 Ops.push_back(N->getOperand(2));
3757 Ops.push_back(N->getOperand(3));
3758 Ops.push_back(Chain);
3760 case NVPTXISD::Suld2DV2I32Clamp:
3761 Opc = NVPTX::SULD_2D_V2I32_CLAMP;
3762 Ops.push_back(TexHandle);
3763 Ops.push_back(N->getOperand(2));
3764 Ops.push_back(N->getOperand(3));
3765 Ops.push_back(Chain);
3767 case NVPTXISD::Suld2DV2I64Clamp:
3768 Opc = NVPTX::SULD_2D_V2I64_CLAMP;
3769 Ops.push_back(TexHandle);
3770 Ops.push_back(N->getOperand(2));
3771 Ops.push_back(N->getOperand(3));
3772 Ops.push_back(Chain);
3774 case NVPTXISD::Suld2DV4I8Clamp:
3775 Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3776 Ops.push_back(TexHandle);
3777 Ops.push_back(N->getOperand(2));
3778 Ops.push_back(N->getOperand(3));
3779 Ops.push_back(Chain);
3781 case NVPTXISD::Suld2DV4I16Clamp:
3782 Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3783 Ops.push_back(TexHandle);
3784 Ops.push_back(N->getOperand(2));
3785 Ops.push_back(N->getOperand(3));
3786 Ops.push_back(Chain);
3788 case NVPTXISD::Suld2DV4I32Clamp:
3789 Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3790 Ops.push_back(TexHandle);
3791 Ops.push_back(N->getOperand(2));
3792 Ops.push_back(N->getOperand(3));
3793 Ops.push_back(Chain);
3795 case NVPTXISD::Suld2DArrayI8Clamp:
3796 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3797 Ops.push_back(TexHandle);
3798 Ops.push_back(N->getOperand(2));
3799 Ops.push_back(N->getOperand(3));
3800 Ops.push_back(N->getOperand(4));
3801 Ops.push_back(Chain);
3803 case NVPTXISD::Suld2DArrayI16Clamp:
3804 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3805 Ops.push_back(TexHandle);
3806 Ops.push_back(N->getOperand(2));
3807 Ops.push_back(N->getOperand(3));
3808 Ops.push_back(N->getOperand(4));
3809 Ops.push_back(Chain);
3811 case NVPTXISD::Suld2DArrayI32Clamp:
3812 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3813 Ops.push_back(TexHandle);
3814 Ops.push_back(N->getOperand(2));
3815 Ops.push_back(N->getOperand(3));
3816 Ops.push_back(N->getOperand(4));
3817 Ops.push_back(Chain);
3819 case NVPTXISD::Suld2DArrayI64Clamp:
3820 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3821 Ops.push_back(TexHandle);
3822 Ops.push_back(N->getOperand(2));
3823 Ops.push_back(N->getOperand(3));
3824 Ops.push_back(N->getOperand(4));
3825 Ops.push_back(Chain);
3827 case NVPTXISD::Suld2DArrayV2I8Clamp:
3828 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3829 Ops.push_back(TexHandle);
3830 Ops.push_back(N->getOperand(2));
3831 Ops.push_back(N->getOperand(3));
3832 Ops.push_back(N->getOperand(4));
3833 Ops.push_back(Chain);
3835 case NVPTXISD::Suld2DArrayV2I16Clamp:
3836 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3837 Ops.push_back(TexHandle);
3838 Ops.push_back(N->getOperand(2));
3839 Ops.push_back(N->getOperand(3));
3840 Ops.push_back(N->getOperand(4));
3841 Ops.push_back(Chain);
3843 case NVPTXISD::Suld2DArrayV2I32Clamp:
3844 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3845 Ops.push_back(TexHandle);
3846 Ops.push_back(N->getOperand(2));
3847 Ops.push_back(N->getOperand(3));
3848 Ops.push_back(N->getOperand(4));
3849 Ops.push_back(Chain);
3851 case NVPTXISD::Suld2DArrayV2I64Clamp:
3852 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3853 Ops.push_back(TexHandle);
3854 Ops.push_back(N->getOperand(2));
3855 Ops.push_back(N->getOperand(3));
3856 Ops.push_back(N->getOperand(4));
3857 Ops.push_back(Chain);
3859 case NVPTXISD::Suld2DArrayV4I8Clamp:
3860 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3861 Ops.push_back(TexHandle);
3862 Ops.push_back(N->getOperand(2));
3863 Ops.push_back(N->getOperand(3));
3864 Ops.push_back(N->getOperand(4));
3865 Ops.push_back(Chain);
3867 case NVPTXISD::Suld2DArrayV4I16Clamp:
3868 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3869 Ops.push_back(TexHandle);
3870 Ops.push_back(N->getOperand(2));
3871 Ops.push_back(N->getOperand(3));
3872 Ops.push_back(N->getOperand(4));
3873 Ops.push_back(Chain);
3875 case NVPTXISD::Suld2DArrayV4I32Clamp:
3876 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3877 Ops.push_back(TexHandle);
3878 Ops.push_back(N->getOperand(2));
3879 Ops.push_back(N->getOperand(3));
3880 Ops.push_back(N->getOperand(4));
3881 Ops.push_back(Chain);
3883 case NVPTXISD::Suld3DI8Clamp:
3884 Opc = NVPTX::SULD_3D_I8_CLAMP;
3885 Ops.push_back(TexHandle);
3886 Ops.push_back(N->getOperand(2));
3887 Ops.push_back(N->getOperand(3));
3888 Ops.push_back(N->getOperand(4));
3889 Ops.push_back(Chain);
3891 case NVPTXISD::Suld3DI16Clamp:
3892 Opc = NVPTX::SULD_3D_I16_CLAMP;
3893 Ops.push_back(TexHandle);
3894 Ops.push_back(N->getOperand(2));
3895 Ops.push_back(N->getOperand(3));
3896 Ops.push_back(N->getOperand(4));
3897 Ops.push_back(Chain);
3899 case NVPTXISD::Suld3DI32Clamp:
3900 Opc = NVPTX::SULD_3D_I32_CLAMP;
3901 Ops.push_back(TexHandle);
3902 Ops.push_back(N->getOperand(2));
3903 Ops.push_back(N->getOperand(3));
3904 Ops.push_back(N->getOperand(4));
3905 Ops.push_back(Chain);
3907 case NVPTXISD::Suld3DI64Clamp:
3908 Opc = NVPTX::SULD_3D_I64_CLAMP;
3909 Ops.push_back(TexHandle);
3910 Ops.push_back(N->getOperand(2));
3911 Ops.push_back(N->getOperand(3));
3912 Ops.push_back(N->getOperand(4));
3913 Ops.push_back(Chain);
3915 case NVPTXISD::Suld3DV2I8Clamp:
3916 Opc = NVPTX::SULD_3D_V2I8_CLAMP;
3917 Ops.push_back(TexHandle);
3918 Ops.push_back(N->getOperand(2));
3919 Ops.push_back(N->getOperand(3));
3920 Ops.push_back(N->getOperand(4));
3921 Ops.push_back(Chain);
3923 case NVPTXISD::Suld3DV2I16Clamp:
3924 Opc = NVPTX::SULD_3D_V2I16_CLAMP;
3925 Ops.push_back(TexHandle);
3926 Ops.push_back(N->getOperand(2));
3927 Ops.push_back(N->getOperand(3));
3928 Ops.push_back(N->getOperand(4));
3929 Ops.push_back(Chain);
3931 case NVPTXISD::Suld3DV2I32Clamp:
3932 Opc = NVPTX::SULD_3D_V2I32_CLAMP;
3933 Ops.push_back(TexHandle);
3934 Ops.push_back(N->getOperand(2));
3935 Ops.push_back(N->getOperand(3));
3936 Ops.push_back(N->getOperand(4));
3937 Ops.push_back(Chain);
3939 case NVPTXISD::Suld3DV2I64Clamp:
3940 Opc = NVPTX::SULD_3D_V2I64_CLAMP;
3941 Ops.push_back(TexHandle);
3942 Ops.push_back(N->getOperand(2));
3943 Ops.push_back(N->getOperand(3));
3944 Ops.push_back(N->getOperand(4));
3945 Ops.push_back(Chain);
3947 case NVPTXISD::Suld3DV4I8Clamp:
3948 Opc = NVPTX::SULD_3D_V4I8_CLAMP;
3949 Ops.push_back(TexHandle);
3950 Ops.push_back(N->getOperand(2));
3951 Ops.push_back(N->getOperand(3));
3952 Ops.push_back(N->getOperand(4));
3953 Ops.push_back(Chain);
3955 case NVPTXISD::Suld3DV4I16Clamp:
3956 Opc = NVPTX::SULD_3D_V4I16_CLAMP;
3957 Ops.push_back(TexHandle);
3958 Ops.push_back(N->getOperand(2));
3959 Ops.push_back(N->getOperand(3));
3960 Ops.push_back(N->getOperand(4));
3961 Ops.push_back(Chain);
3963 case NVPTXISD::Suld3DV4I32Clamp:
3964 Opc = NVPTX::SULD_3D_V4I32_CLAMP;
3965 Ops.push_back(TexHandle);
3966 Ops.push_back(N->getOperand(2));
3967 Ops.push_back(N->getOperand(3));
3968 Ops.push_back(N->getOperand(4));
3969 Ops.push_back(Chain);
3971 case NVPTXISD::Suld1DI8Trap:
3972 Opc = NVPTX::SULD_1D_I8_TRAP;
3973 Ops.push_back(TexHandle);
3974 Ops.push_back(N->getOperand(2));
3975 Ops.push_back(Chain);
3977 case NVPTXISD::Suld1DI16Trap:
3978 Opc = NVPTX::SULD_1D_I16_TRAP;
3979 Ops.push_back(TexHandle);
3980 Ops.push_back(N->getOperand(2));
3981 Ops.push_back(Chain);
3983 case NVPTXISD::Suld1DI32Trap:
3984 Opc = NVPTX::SULD_1D_I32_TRAP;
3985 Ops.push_back(TexHandle);
3986 Ops.push_back(N->getOperand(2));
3987 Ops.push_back(Chain);
3989 case NVPTXISD::Suld1DI64Trap:
3990 Opc = NVPTX::SULD_1D_I64_TRAP;
3991 Ops.push_back(TexHandle);
3992 Ops.push_back(N->getOperand(2));
3993 Ops.push_back(Chain);
3995 case NVPTXISD::Suld1DV2I8Trap:
3996 Opc = NVPTX::SULD_1D_V2I8_TRAP;
3997 Ops.push_back(TexHandle);
3998 Ops.push_back(N->getOperand(2));
3999 Ops.push_back(Chain);
4001 case NVPTXISD::Suld1DV2I16Trap:
4002 Opc = NVPTX::SULD_1D_V2I16_TRAP;
4003 Ops.push_back(TexHandle);
4004 Ops.push_back(N->getOperand(2));
4005 Ops.push_back(Chain);
4007 case NVPTXISD::Suld1DV2I32Trap:
4008 Opc = NVPTX::SULD_1D_V2I32_TRAP;
4009 Ops.push_back(TexHandle);
4010 Ops.push_back(N->getOperand(2));
4011 Ops.push_back(Chain);
4013 case NVPTXISD::Suld1DV2I64Trap:
4014 Opc = NVPTX::SULD_1D_V2I64_TRAP;
4015 Ops.push_back(TexHandle);
4016 Ops.push_back(N->getOperand(2));
4017 Ops.push_back(Chain);
4019 case NVPTXISD::Suld1DV4I8Trap:
4020 Opc = NVPTX::SULD_1D_V4I8_TRAP;
4021 Ops.push_back(TexHandle);
4022 Ops.push_back(N->getOperand(2));
4023 Ops.push_back(Chain);
4025 case NVPTXISD::Suld1DV4I16Trap:
4026 Opc = NVPTX::SULD_1D_V4I16_TRAP;
4027 Ops.push_back(TexHandle);
4028 Ops.push_back(N->getOperand(2));
4029 Ops.push_back(Chain);
4031 case NVPTXISD::Suld1DV4I32Trap:
4032 Opc = NVPTX::SULD_1D_V4I32_TRAP;
4033 Ops.push_back(TexHandle);
4034 Ops.push_back(N->getOperand(2));
4035 Ops.push_back(Chain);
4037 case NVPTXISD::Suld1DArrayI8Trap:
4038 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
4039 Ops.push_back(TexHandle);
4040 Ops.push_back(N->getOperand(2));
4041 Ops.push_back(N->getOperand(3));
4042 Ops.push_back(Chain);
4044 case NVPTXISD::Suld1DArrayI16Trap:
4045 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4046 Ops.push_back(TexHandle);
4047 Ops.push_back(N->getOperand(2));
4048 Ops.push_back(N->getOperand(3));
4049 Ops.push_back(Chain);
4051 case NVPTXISD::Suld1DArrayI32Trap:
4052 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4053 Ops.push_back(TexHandle);
4054 Ops.push_back(N->getOperand(2));
4055 Ops.push_back(N->getOperand(3));
4056 Ops.push_back(Chain);
4058 case NVPTXISD::Suld1DArrayI64Trap:
4059 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4060 Ops.push_back(TexHandle);
4061 Ops.push_back(N->getOperand(2));
4062 Ops.push_back(N->getOperand(3));
4063 Ops.push_back(Chain);
4065 case NVPTXISD::Suld1DArrayV2I8Trap:
4066 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4067 Ops.push_back(TexHandle);
4068 Ops.push_back(N->getOperand(2));
4069 Ops.push_back(N->getOperand(3));
4070 Ops.push_back(Chain);
4072 case NVPTXISD::Suld1DArrayV2I16Trap:
4073 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4074 Ops.push_back(TexHandle);
4075 Ops.push_back(N->getOperand(2));
4076 Ops.push_back(N->getOperand(3));
4077 Ops.push_back(Chain);
4079 case NVPTXISD::Suld1DArrayV2I32Trap:
4080 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4081 Ops.push_back(TexHandle);
4082 Ops.push_back(N->getOperand(2));
4083 Ops.push_back(N->getOperand(3));
4084 Ops.push_back(Chain);
4086 case NVPTXISD::Suld1DArrayV2I64Trap:
4087 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4088 Ops.push_back(TexHandle);
4089 Ops.push_back(N->getOperand(2));
4090 Ops.push_back(N->getOperand(3));
4091 Ops.push_back(Chain);
4093 case NVPTXISD::Suld1DArrayV4I8Trap:
4094 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4095 Ops.push_back(TexHandle);
4096 Ops.push_back(N->getOperand(2));
4097 Ops.push_back(N->getOperand(3));
4098 Ops.push_back(Chain);
4100 case NVPTXISD::Suld1DArrayV4I16Trap:
4101 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4102 Ops.push_back(TexHandle);
4103 Ops.push_back(N->getOperand(2));
4104 Ops.push_back(N->getOperand(3));
4105 Ops.push_back(Chain);
4107 case NVPTXISD::Suld1DArrayV4I32Trap:
4108 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4109 Ops.push_back(TexHandle);
4110 Ops.push_back(N->getOperand(2));
4111 Ops.push_back(N->getOperand(3));
4112 Ops.push_back(Chain);
4114 case NVPTXISD::Suld2DI8Trap:
4115 Opc = NVPTX::SULD_2D_I8_TRAP;
4116 Ops.push_back(TexHandle);
4117 Ops.push_back(N->getOperand(2));
4118 Ops.push_back(N->getOperand(3));
4119 Ops.push_back(Chain);
4121 case NVPTXISD::Suld2DI16Trap:
4122 Opc = NVPTX::SULD_2D_I16_TRAP;
4123 Ops.push_back(TexHandle);
4124 Ops.push_back(N->getOperand(2));
4125 Ops.push_back(N->getOperand(3));
4126 Ops.push_back(Chain);
4128 case NVPTXISD::Suld2DI32Trap:
4129 Opc = NVPTX::SULD_2D_I32_TRAP;
4130 Ops.push_back(TexHandle);
4131 Ops.push_back(N->getOperand(2));
4132 Ops.push_back(N->getOperand(3));
4133 Ops.push_back(Chain);
4135 case NVPTXISD::Suld2DI64Trap:
4136 Opc = NVPTX::SULD_2D_I64_TRAP;
4137 Ops.push_back(TexHandle);
4138 Ops.push_back(N->getOperand(2));
4139 Ops.push_back(N->getOperand(3));
4140 Ops.push_back(Chain);
4142 case NVPTXISD::Suld2DV2I8Trap:
4143 Opc = NVPTX::SULD_2D_V2I8_TRAP;
4144 Ops.push_back(TexHandle);
4145 Ops.push_back(N->getOperand(2));
4146 Ops.push_back(N->getOperand(3));
4147 Ops.push_back(Chain);
4149 case NVPTXISD::Suld2DV2I16Trap:
4150 Opc = NVPTX::SULD_2D_V2I16_TRAP;
4151 Ops.push_back(TexHandle);
4152 Ops.push_back(N->getOperand(2));
4153 Ops.push_back(N->getOperand(3));
4154 Ops.push_back(Chain);
4156 case NVPTXISD::Suld2DV2I32Trap:
4157 Opc = NVPTX::SULD_2D_V2I32_TRAP;
4158 Ops.push_back(TexHandle);
4159 Ops.push_back(N->getOperand(2));
4160 Ops.push_back(N->getOperand(3));
4161 Ops.push_back(Chain);
4163 case NVPTXISD::Suld2DV2I64Trap:
4164 Opc = NVPTX::SULD_2D_V2I64_TRAP;
4165 Ops.push_back(TexHandle);
4166 Ops.push_back(N->getOperand(2));
4167 Ops.push_back(N->getOperand(3));
4168 Ops.push_back(Chain);
4170 case NVPTXISD::Suld2DV4I8Trap:
4171 Opc = NVPTX::SULD_2D_V4I8_TRAP;
4172 Ops.push_back(TexHandle);
4173 Ops.push_back(N->getOperand(2));
4174 Ops.push_back(N->getOperand(3));
4175 Ops.push_back(Chain);
4177 case NVPTXISD::Suld2DV4I16Trap:
4178 Opc = NVPTX::SULD_2D_V4I16_TRAP;
4179 Ops.push_back(TexHandle);
4180 Ops.push_back(N->getOperand(2));
4181 Ops.push_back(N->getOperand(3));
4182 Ops.push_back(Chain);
4184 case NVPTXISD::Suld2DV4I32Trap:
4185 Opc = NVPTX::SULD_2D_V4I32_TRAP;
4186 Ops.push_back(TexHandle);
4187 Ops.push_back(N->getOperand(2));
4188 Ops.push_back(N->getOperand(3));
4189 Ops.push_back(Chain);
4191 case NVPTXISD::Suld2DArrayI8Trap:
4192 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4193 Ops.push_back(TexHandle);
4194 Ops.push_back(N->getOperand(2));
4195 Ops.push_back(N->getOperand(3));
4196 Ops.push_back(N->getOperand(4));
4197 Ops.push_back(Chain);
4199 case NVPTXISD::Suld2DArrayI16Trap:
4200 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4201 Ops.push_back(TexHandle);
4202 Ops.push_back(N->getOperand(2));
4203 Ops.push_back(N->getOperand(3));
4204 Ops.push_back(N->getOperand(4));
4205 Ops.push_back(Chain);
4207 case NVPTXISD::Suld2DArrayI32Trap:
4208 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4209 Ops.push_back(TexHandle);
4210 Ops.push_back(N->getOperand(2));
4211 Ops.push_back(N->getOperand(3));
4212 Ops.push_back(N->getOperand(4));
4213 Ops.push_back(Chain);
4215 case NVPTXISD::Suld2DArrayI64Trap:
4216 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4217 Ops.push_back(TexHandle);
4218 Ops.push_back(N->getOperand(2));
4219 Ops.push_back(N->getOperand(3));
4220 Ops.push_back(N->getOperand(4));
4221 Ops.push_back(Chain);
4223 case NVPTXISD::Suld2DArrayV2I8Trap:
4224 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4225 Ops.push_back(TexHandle);
4226 Ops.push_back(N->getOperand(2));
4227 Ops.push_back(N->getOperand(3));
4228 Ops.push_back(N->getOperand(4));
4229 Ops.push_back(Chain);
4231 case NVPTXISD::Suld2DArrayV2I16Trap:
4232 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4233 Ops.push_back(TexHandle);
4234 Ops.push_back(N->getOperand(2));
4235 Ops.push_back(N->getOperand(3));
4236 Ops.push_back(N->getOperand(4));
4237 Ops.push_back(Chain);
4239 case NVPTXISD::Suld2DArrayV2I32Trap:
4240 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4241 Ops.push_back(TexHandle);
4242 Ops.push_back(N->getOperand(2));
4243 Ops.push_back(N->getOperand(3));
4244 Ops.push_back(N->getOperand(4));
4245 Ops.push_back(Chain);
4247 case NVPTXISD::Suld2DArrayV2I64Trap:
4248 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4249 Ops.push_back(TexHandle);
4250 Ops.push_back(N->getOperand(2));
4251 Ops.push_back(N->getOperand(3));
4252 Ops.push_back(N->getOperand(4));
4253 Ops.push_back(Chain);
4255 case NVPTXISD::Suld2DArrayV4I8Trap:
4256 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4257 Ops.push_back(TexHandle);
4258 Ops.push_back(N->getOperand(2));
4259 Ops.push_back(N->getOperand(3));
4260 Ops.push_back(N->getOperand(4));
4261 Ops.push_back(Chain);
4263 case NVPTXISD::Suld2DArrayV4I16Trap:
4264 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4265 Ops.push_back(TexHandle);
4266 Ops.push_back(N->getOperand(2));
4267 Ops.push_back(N->getOperand(3));
4268 Ops.push_back(N->getOperand(4));
4269 Ops.push_back(Chain);
4271 case NVPTXISD::Suld2DArrayV4I32Trap:
4272 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4273 Ops.push_back(TexHandle);
4274 Ops.push_back(N->getOperand(2));
4275 Ops.push_back(N->getOperand(3));
4276 Ops.push_back(N->getOperand(4));
4277 Ops.push_back(Chain);
4279 case NVPTXISD::Suld3DI8Trap:
4280 Opc = NVPTX::SULD_3D_I8_TRAP;
4281 Ops.push_back(TexHandle);
4282 Ops.push_back(N->getOperand(2));
4283 Ops.push_back(N->getOperand(3));
4284 Ops.push_back(N->getOperand(4));
4285 Ops.push_back(Chain);
4287 case NVPTXISD::Suld3DI16Trap:
4288 Opc = NVPTX::SULD_3D_I16_TRAP;
4289 Ops.push_back(TexHandle);
4290 Ops.push_back(N->getOperand(2));
4291 Ops.push_back(N->getOperand(3));
4292 Ops.push_back(N->getOperand(4));
4293 Ops.push_back(Chain);
4295 case NVPTXISD::Suld3DI32Trap:
4296 Opc = NVPTX::SULD_3D_I32_TRAP;
4297 Ops.push_back(TexHandle);
4298 Ops.push_back(N->getOperand(2));
4299 Ops.push_back(N->getOperand(3));
4300 Ops.push_back(N->getOperand(4));
4301 Ops.push_back(Chain);
4303 case NVPTXISD::Suld3DI64Trap:
4304 Opc = NVPTX::SULD_3D_I64_TRAP;
4305 Ops.push_back(TexHandle);
4306 Ops.push_back(N->getOperand(2));
4307 Ops.push_back(N->getOperand(3));
4308 Ops.push_back(N->getOperand(4));
4309 Ops.push_back(Chain);
4311 case NVPTXISD::Suld3DV2I8Trap:
4312 Opc = NVPTX::SULD_3D_V2I8_TRAP;
4313 Ops.push_back(TexHandle);
4314 Ops.push_back(N->getOperand(2));
4315 Ops.push_back(N->getOperand(3));
4316 Ops.push_back(N->getOperand(4));
4317 Ops.push_back(Chain);
4319 case NVPTXISD::Suld3DV2I16Trap:
4320 Opc = NVPTX::SULD_3D_V2I16_TRAP;
4321 Ops.push_back(TexHandle);
4322 Ops.push_back(N->getOperand(2));
4323 Ops.push_back(N->getOperand(3));
4324 Ops.push_back(N->getOperand(4));
4325 Ops.push_back(Chain);
4327 case NVPTXISD::Suld3DV2I32Trap:
4328 Opc = NVPTX::SULD_3D_V2I32_TRAP;
4329 Ops.push_back(TexHandle);
4330 Ops.push_back(N->getOperand(2));
4331 Ops.push_back(N->getOperand(3));
4332 Ops.push_back(N->getOperand(4));
4333 Ops.push_back(Chain);
4335 case NVPTXISD::Suld3DV2I64Trap:
4336 Opc = NVPTX::SULD_3D_V2I64_TRAP;
4337 Ops.push_back(TexHandle);
4338 Ops.push_back(N->getOperand(2));
4339 Ops.push_back(N->getOperand(3));
4340 Ops.push_back(N->getOperand(4));
4341 Ops.push_back(Chain);
4343 case NVPTXISD::Suld3DV4I8Trap:
4344 Opc = NVPTX::SULD_3D_V4I8_TRAP;
4345 Ops.push_back(TexHandle);
4346 Ops.push_back(N->getOperand(2));
4347 Ops.push_back(N->getOperand(3));
4348 Ops.push_back(N->getOperand(4));
4349 Ops.push_back(Chain);
4351 case NVPTXISD::Suld3DV4I16Trap:
4352 Opc = NVPTX::SULD_3D_V4I16_TRAP;
4353 Ops.push_back(TexHandle);
4354 Ops.push_back(N->getOperand(2));
4355 Ops.push_back(N->getOperand(3));
4356 Ops.push_back(N->getOperand(4));
4357 Ops.push_back(Chain);
4359 case NVPTXISD::Suld3DV4I32Trap:
4360 Opc = NVPTX::SULD_3D_V4I32_TRAP;
4361 Ops.push_back(TexHandle);
4362 Ops.push_back(N->getOperand(2));
4363 Ops.push_back(N->getOperand(3));
4364 Ops.push_back(N->getOperand(4));
4365 Ops.push_back(Chain);
4367 case NVPTXISD::Suld1DI8Zero:
4368 Opc = NVPTX::SULD_1D_I8_ZERO;
4369 Ops.push_back(TexHandle);
4370 Ops.push_back(N->getOperand(2));
4371 Ops.push_back(Chain);
4373 case NVPTXISD::Suld1DI16Zero:
4374 Opc = NVPTX::SULD_1D_I16_ZERO;
4375 Ops.push_back(TexHandle);
4376 Ops.push_back(N->getOperand(2));
4377 Ops.push_back(Chain);
4379 case NVPTXISD::Suld1DI32Zero:
4380 Opc = NVPTX::SULD_1D_I32_ZERO;
4381 Ops.push_back(TexHandle);
4382 Ops.push_back(N->getOperand(2));
4383 Ops.push_back(Chain);
4385 case NVPTXISD::Suld1DI64Zero:
4386 Opc = NVPTX::SULD_1D_I64_ZERO;
4387 Ops.push_back(TexHandle);
4388 Ops.push_back(N->getOperand(2));
4389 Ops.push_back(Chain);
4391 case NVPTXISD::Suld1DV2I8Zero:
4392 Opc = NVPTX::SULD_1D_V2I8_ZERO;
4393 Ops.push_back(TexHandle);
4394 Ops.push_back(N->getOperand(2));
4395 Ops.push_back(Chain);
4397 case NVPTXISD::Suld1DV2I16Zero:
4398 Opc = NVPTX::SULD_1D_V2I16_ZERO;
4399 Ops.push_back(TexHandle);
4400 Ops.push_back(N->getOperand(2));
4401 Ops.push_back(Chain);
4403 case NVPTXISD::Suld1DV2I32Zero:
4404 Opc = NVPTX::SULD_1D_V2I32_ZERO;
4405 Ops.push_back(TexHandle);
4406 Ops.push_back(N->getOperand(2));
4407 Ops.push_back(Chain);
4409 case NVPTXISD::Suld1DV2I64Zero:
4410 Opc = NVPTX::SULD_1D_V2I64_ZERO;
4411 Ops.push_back(TexHandle);
4412 Ops.push_back(N->getOperand(2));
4413 Ops.push_back(Chain);
4415 case NVPTXISD::Suld1DV4I8Zero:
4416 Opc = NVPTX::SULD_1D_V4I8_ZERO;
4417 Ops.push_back(TexHandle);
4418 Ops.push_back(N->getOperand(2));
4419 Ops.push_back(Chain);
4421 case NVPTXISD::Suld1DV4I16Zero:
4422 Opc = NVPTX::SULD_1D_V4I16_ZERO;
4423 Ops.push_back(TexHandle);
4424 Ops.push_back(N->getOperand(2));
4425 Ops.push_back(Chain);
4427 case NVPTXISD::Suld1DV4I32Zero:
4428 Opc = NVPTX::SULD_1D_V4I32_ZERO;
4429 Ops.push_back(TexHandle);
4430 Ops.push_back(N->getOperand(2));
4431 Ops.push_back(Chain);
4433 case NVPTXISD::Suld1DArrayI8Zero:
4434 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4435 Ops.push_back(TexHandle);
4436 Ops.push_back(N->getOperand(2));
4437 Ops.push_back(N->getOperand(3));
4438 Ops.push_back(Chain);
4440 case NVPTXISD::Suld1DArrayI16Zero:
4441 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4442 Ops.push_back(TexHandle);
4443 Ops.push_back(N->getOperand(2));
4444 Ops.push_back(N->getOperand(3));
4445 Ops.push_back(Chain);
4447 case NVPTXISD::Suld1DArrayI32Zero:
4448 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4449 Ops.push_back(TexHandle);
4450 Ops.push_back(N->getOperand(2));
4451 Ops.push_back(N->getOperand(3));
4452 Ops.push_back(Chain);
4454 case NVPTXISD::Suld1DArrayI64Zero:
4455 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4456 Ops.push_back(TexHandle);
4457 Ops.push_back(N->getOperand(2));
4458 Ops.push_back(N->getOperand(3));
4459 Ops.push_back(Chain);
4461 case NVPTXISD::Suld1DArrayV2I8Zero:
4462 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4463 Ops.push_back(TexHandle);
4464 Ops.push_back(N->getOperand(2));
4465 Ops.push_back(N->getOperand(3));
4466 Ops.push_back(Chain);
4468 case NVPTXISD::Suld1DArrayV2I16Zero:
4469 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4470 Ops.push_back(TexHandle);
4471 Ops.push_back(N->getOperand(2));
4472 Ops.push_back(N->getOperand(3));
4473 Ops.push_back(Chain);
4475 case NVPTXISD::Suld1DArrayV2I32Zero:
4476 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4477 Ops.push_back(TexHandle);
4478 Ops.push_back(N->getOperand(2));
4479 Ops.push_back(N->getOperand(3));
4480 Ops.push_back(Chain);
4482 case NVPTXISD::Suld1DArrayV2I64Zero:
4483 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4484 Ops.push_back(TexHandle);
4485 Ops.push_back(N->getOperand(2));
4486 Ops.push_back(N->getOperand(3));
4487 Ops.push_back(Chain);
4489 case NVPTXISD::Suld1DArrayV4I8Zero:
4490 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4491 Ops.push_back(TexHandle);
4492 Ops.push_back(N->getOperand(2));
4493 Ops.push_back(N->getOperand(3));
4494 Ops.push_back(Chain);
4496 case NVPTXISD::Suld1DArrayV4I16Zero:
4497 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4498 Ops.push_back(TexHandle);
4499 Ops.push_back(N->getOperand(2));
4500 Ops.push_back(N->getOperand(3));
4501 Ops.push_back(Chain);
4503 case NVPTXISD::Suld1DArrayV4I32Zero:
4504 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4505 Ops.push_back(TexHandle);
4506 Ops.push_back(N->getOperand(2));
4507 Ops.push_back(N->getOperand(3));
4508 Ops.push_back(Chain);
4510 case NVPTXISD::Suld2DI8Zero:
4511 Opc = NVPTX::SULD_2D_I8_ZERO;
4512 Ops.push_back(TexHandle);
4513 Ops.push_back(N->getOperand(2));
4514 Ops.push_back(N->getOperand(3));
4515 Ops.push_back(Chain);
4517 case NVPTXISD::Suld2DI16Zero:
4518 Opc = NVPTX::SULD_2D_I16_ZERO;
4519 Ops.push_back(TexHandle);
4520 Ops.push_back(N->getOperand(2));
4521 Ops.push_back(N->getOperand(3));
4522 Ops.push_back(Chain);
4524 case NVPTXISD::Suld2DI32Zero:
4525 Opc = NVPTX::SULD_2D_I32_ZERO;
4526 Ops.push_back(TexHandle);
4527 Ops.push_back(N->getOperand(2));
4528 Ops.push_back(N->getOperand(3));
4529 Ops.push_back(Chain);
4531 case NVPTXISD::Suld2DI64Zero:
4532 Opc = NVPTX::SULD_2D_I64_ZERO;
4533 Ops.push_back(TexHandle);
4534 Ops.push_back(N->getOperand(2));
4535 Ops.push_back(N->getOperand(3));
4536 Ops.push_back(Chain);
4538 case NVPTXISD::Suld2DV2I8Zero:
4539 Opc = NVPTX::SULD_2D_V2I8_ZERO;
4540 Ops.push_back(TexHandle);
4541 Ops.push_back(N->getOperand(2));
4542 Ops.push_back(N->getOperand(3));
4543 Ops.push_back(Chain);
4545 case NVPTXISD::Suld2DV2I16Zero:
4546 Opc = NVPTX::SULD_2D_V2I16_ZERO;
4547 Ops.push_back(TexHandle);
4548 Ops.push_back(N->getOperand(2));
4549 Ops.push_back(N->getOperand(3));
4550 Ops.push_back(Chain);
4552 case NVPTXISD::Suld2DV2I32Zero:
4553 Opc = NVPTX::SULD_2D_V2I32_ZERO;
4554 Ops.push_back(TexHandle);
4555 Ops.push_back(N->getOperand(2));
4556 Ops.push_back(N->getOperand(3));
4557 Ops.push_back(Chain);
4559 case NVPTXISD::Suld2DV2I64Zero:
4560 Opc = NVPTX::SULD_2D_V2I64_ZERO;
4561 Ops.push_back(TexHandle);
4562 Ops.push_back(N->getOperand(2));
4563 Ops.push_back(N->getOperand(3));
4564 Ops.push_back(Chain);
4566 case NVPTXISD::Suld2DV4I8Zero:
4567 Opc = NVPTX::SULD_2D_V4I8_ZERO;
4568 Ops.push_back(TexHandle);
4569 Ops.push_back(N->getOperand(2));
4570 Ops.push_back(N->getOperand(3));
4571 Ops.push_back(Chain);
4573 case NVPTXISD::Suld2DV4I16Zero:
4574 Opc = NVPTX::SULD_2D_V4I16_ZERO;
4575 Ops.push_back(TexHandle);
4576 Ops.push_back(N->getOperand(2));
4577 Ops.push_back(N->getOperand(3));
4578 Ops.push_back(Chain);
4580 case NVPTXISD::Suld2DV4I32Zero:
4581 Opc = NVPTX::SULD_2D_V4I32_ZERO;
4582 Ops.push_back(TexHandle);
4583 Ops.push_back(N->getOperand(2));
4584 Ops.push_back(N->getOperand(3));
4585 Ops.push_back(Chain);
4587 case NVPTXISD::Suld2DArrayI8Zero:
4588 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
4589 Ops.push_back(TexHandle);
4590 Ops.push_back(N->getOperand(2));
4591 Ops.push_back(N->getOperand(3));
4592 Ops.push_back(N->getOperand(4));
4593 Ops.push_back(Chain);
4595 case NVPTXISD::Suld2DArrayI16Zero:
4596 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
4597 Ops.push_back(TexHandle);
4598 Ops.push_back(N->getOperand(2));
4599 Ops.push_back(N->getOperand(3));
4600 Ops.push_back(N->getOperand(4));
4601 Ops.push_back(Chain);
4603 case NVPTXISD::Suld2DArrayI32Zero:
4604 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
4605 Ops.push_back(TexHandle);
4606 Ops.push_back(N->getOperand(2));
4607 Ops.push_back(N->getOperand(3));
4608 Ops.push_back(N->getOperand(4));
4609 Ops.push_back(Chain);
4611 case NVPTXISD::Suld2DArrayI64Zero:
4612 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
4613 Ops.push_back(TexHandle);
4614 Ops.push_back(N->getOperand(2));
4615 Ops.push_back(N->getOperand(3));
4616 Ops.push_back(N->getOperand(4));
4617 Ops.push_back(Chain);
4619 case NVPTXISD::Suld2DArrayV2I8Zero:
4620 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
4621 Ops.push_back(TexHandle);
4622 Ops.push_back(N->getOperand(2));
4623 Ops.push_back(N->getOperand(3));
4624 Ops.push_back(N->getOperand(4));
4625 Ops.push_back(Chain);
4627 case NVPTXISD::Suld2DArrayV2I16Zero:
4628 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
4629 Ops.push_back(TexHandle);
4630 Ops.push_back(N->getOperand(2));
4631 Ops.push_back(N->getOperand(3));
4632 Ops.push_back(N->getOperand(4));
4633 Ops.push_back(Chain);
4635 case NVPTXISD::Suld2DArrayV2I32Zero:
4636 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
4637 Ops.push_back(TexHandle);
4638 Ops.push_back(N->getOperand(2));
4639 Ops.push_back(N->getOperand(3));
4640 Ops.push_back(N->getOperand(4));
4641 Ops.push_back(Chain);
4643 case NVPTXISD::Suld2DArrayV2I64Zero:
4644 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
4645 Ops.push_back(TexHandle);
4646 Ops.push_back(N->getOperand(2));
4647 Ops.push_back(N->getOperand(3));
4648 Ops.push_back(N->getOperand(4));
4649 Ops.push_back(Chain);
4651 case NVPTXISD::Suld2DArrayV4I8Zero:
4652 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
4653 Ops.push_back(TexHandle);
4654 Ops.push_back(N->getOperand(2));
4655 Ops.push_back(N->getOperand(3));
4656 Ops.push_back(N->getOperand(4));
4657 Ops.push_back(Chain);
4659 case NVPTXISD::Suld2DArrayV4I16Zero:
4660 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
4661 Ops.push_back(TexHandle);
4662 Ops.push_back(N->getOperand(2));
4663 Ops.push_back(N->getOperand(3));
4664 Ops.push_back(N->getOperand(4));
4665 Ops.push_back(Chain);
4667 case NVPTXISD::Suld2DArrayV4I32Zero:
4668 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
4669 Ops.push_back(TexHandle);
4670 Ops.push_back(N->getOperand(2));
4671 Ops.push_back(N->getOperand(3));
4672 Ops.push_back(N->getOperand(4));
4673 Ops.push_back(Chain);
4675 case NVPTXISD::Suld3DI8Zero:
4676 Opc = NVPTX::SULD_3D_I8_ZERO;
4677 Ops.push_back(TexHandle);
4678 Ops.push_back(N->getOperand(2));
4679 Ops.push_back(N->getOperand(3));
4680 Ops.push_back(N->getOperand(4));
4681 Ops.push_back(Chain);
4683 case NVPTXISD::Suld3DI16Zero:
4684 Opc = NVPTX::SULD_3D_I16_ZERO;
4685 Ops.push_back(TexHandle);
4686 Ops.push_back(N->getOperand(2));
4687 Ops.push_back(N->getOperand(3));
4688 Ops.push_back(N->getOperand(4));
4689 Ops.push_back(Chain);
4691 case NVPTXISD::Suld3DI32Zero:
4692 Opc = NVPTX::SULD_3D_I32_ZERO;
4693 Ops.push_back(TexHandle);
4694 Ops.push_back(N->getOperand(2));
4695 Ops.push_back(N->getOperand(3));
4696 Ops.push_back(N->getOperand(4));
4697 Ops.push_back(Chain);
4699 case NVPTXISD::Suld3DI64Zero:
4700 Opc = NVPTX::SULD_3D_I64_ZERO;
4701 Ops.push_back(TexHandle);
4702 Ops.push_back(N->getOperand(2));
4703 Ops.push_back(N->getOperand(3));
4704 Ops.push_back(N->getOperand(4));
4705 Ops.push_back(Chain);
4707 case NVPTXISD::Suld3DV2I8Zero:
4708 Opc = NVPTX::SULD_3D_V2I8_ZERO;
4709 Ops.push_back(TexHandle);
4710 Ops.push_back(N->getOperand(2));
4711 Ops.push_back(N->getOperand(3));
4712 Ops.push_back(N->getOperand(4));
4713 Ops.push_back(Chain);
4715 case NVPTXISD::Suld3DV2I16Zero:
4716 Opc = NVPTX::SULD_3D_V2I16_ZERO;
4717 Ops.push_back(TexHandle);
4718 Ops.push_back(N->getOperand(2));
4719 Ops.push_back(N->getOperand(3));
4720 Ops.push_back(N->getOperand(4));
4721 Ops.push_back(Chain);
4723 case NVPTXISD::Suld3DV2I32Zero:
4724 Opc = NVPTX::SULD_3D_V2I32_ZERO;
4725 Ops.push_back(TexHandle);
4726 Ops.push_back(N->getOperand(2));
4727 Ops.push_back(N->getOperand(3));
4728 Ops.push_back(N->getOperand(4));
4729 Ops.push_back(Chain);
4731 case NVPTXISD::Suld3DV2I64Zero:
4732 Opc = NVPTX::SULD_3D_V2I64_ZERO;
4733 Ops.push_back(TexHandle);
4734 Ops.push_back(N->getOperand(2));
4735 Ops.push_back(N->getOperand(3));
4736 Ops.push_back(N->getOperand(4));
4737 Ops.push_back(Chain);
4739 case NVPTXISD::Suld3DV4I8Zero:
4740 Opc = NVPTX::SULD_3D_V4I8_ZERO;
4741 Ops.push_back(TexHandle);
4742 Ops.push_back(N->getOperand(2));
4743 Ops.push_back(N->getOperand(3));
4744 Ops.push_back(N->getOperand(4));
4745 Ops.push_back(Chain);
4747 case NVPTXISD::Suld3DV4I16Zero:
4748 Opc = NVPTX::SULD_3D_V4I16_ZERO;
4749 Ops.push_back(TexHandle);
4750 Ops.push_back(N->getOperand(2));
4751 Ops.push_back(N->getOperand(3));
4752 Ops.push_back(N->getOperand(4));
4753 Ops.push_back(Chain);
4755 case NVPTXISD::Suld3DV4I32Zero:
4756 Opc = NVPTX::SULD_3D_V4I32_ZERO;
4757 Ops.push_back(TexHandle);
4758 Ops.push_back(N->getOperand(2));
4759 Ops.push_back(N->getOperand(3));
4760 Ops.push_back(N->getOperand(4));
4761 Ops.push_back(Chain);
4764 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4769 /// SelectBFE - Look for instruction sequences that can be made more efficient
4770 /// by using the 'bfe' (bit-field extract) PTX instruction
4771 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
4773 SDValue LHS = N->getOperand(0);
4774 SDValue RHS = N->getOperand(1);
4778 bool IsSigned = false;
4780 if (N->getOpcode() == ISD::AND) {
4781 // Canonicalize the operands
4782 // We want 'and %val, %mask'
4783 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4784 std::swap(LHS, RHS);
4787 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
4789 // We need a constant mask on the RHS of the AND
4793 // Extract the mask bits
4794 uint64_t MaskVal = Mask->getZExtValue();
4795 if (!isMask_64(MaskVal)) {
4796 // We *could* handle shifted masks here, but doing so would require an
4797 // 'and' operation to fix up the low-order bits so we would trade
4798 // shr+and for bfe+and, which has the same throughput
4802 // How many bits are in our mask?
4803 uint64_t NumBits = countTrailingOnes(MaskVal);
4804 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4806 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4807 // We have a 'srl/and' pair, extract the effective start bit and length
4808 Val = LHS.getNode()->getOperand(0);
4809 Start = LHS.getNode()->getOperand(1);
4810 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4812 uint64_t StartVal = StartConst->getZExtValue();
4813 // How many "good" bits do we have left? "good" is defined here as bits
4814 // that exist in the original value, not shifted in.
4815 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
4816 if (NumBits > GoodBits) {
4817 // Do not handle the case where bits have been shifted in. In theory
4818 // we could handle this, but the cost is likely higher than just
4819 // emitting the srl/and pair.
4822 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
4824 // Do not handle the case where the shift amount (can be zero if no srl
4825 // was found) is not constant. We could handle this case, but it would
4826 // require run-time logic that would be more expensive than just
4827 // emitting the srl/and pair.
4831 // Do not handle the case where the LHS of the and is not a shift. While
4832 // it would be trivial to handle this case, it would just transform
4833 // 'and' -> 'bfe', but 'and' has higher-throughput.
4836 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4837 if (LHS->getOpcode() == ISD::AND) {
4838 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4840 // Shift amount must be constant
4844 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4846 SDValue AndLHS = LHS->getOperand(0);
4847 SDValue AndRHS = LHS->getOperand(1);
4849 // Canonicalize the AND to have the mask on the RHS
4850 if (isa<ConstantSDNode>(AndLHS)) {
4851 std::swap(AndLHS, AndRHS);
4854 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4856 // Mask must be constant
4860 uint64_t MaskVal = MaskCnst->getZExtValue();
4863 if (isMask_64(MaskVal)) {
4865 // The number of bits in the result bitfield will be the number of
4866 // trailing ones (the AND) minus the number of bits we shift off
4867 NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
4868 } else if (isShiftedMask_64(MaskVal)) {
4869 NumZeros = countTrailingZeros(MaskVal);
4870 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
4871 // The number of bits in the result bitfield will be the number of
4872 // trailing zeros plus the number of set bits in the mask minus the
4873 // number of bits we shift off
4874 NumBits = NumZeros + NumOnes - ShiftAmt;
4876 // This is not a mask we can handle
4880 if (ShiftAmt < NumZeros) {
4881 // Handling this case would require extra logic that would make this
4882 // transformation non-profitable
4887 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
4888 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4889 } else if (LHS->getOpcode() == ISD::SHL) {
4890 // Here, we have a pattern like:
4892 // (sra (shl val, NN), MM)
4894 // (srl (shl val, NN), MM)
4896 // If MM >= NN, we can efficiently optimize this with bfe
4897 Val = LHS->getOperand(0);
4899 SDValue ShlRHS = LHS->getOperand(1);
4900 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
4902 // Shift amount must be constant
4905 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
4907 SDValue ShrRHS = RHS;
4908 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
4910 // Shift amount must be constant
4913 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
4915 // To avoid extra codegen and be profitable, we need Outer >= Inner
4916 if (OuterShiftAmt < InnerShiftAmt) {
4920 // If the outer shift is more than the type size, we have no bitfield to
4921 // extract (since we also check that the inner shift is <= the outer shift
4922 // then this also implies that the inner shift is < the type size)
4923 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
4928 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32);
4930 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
4931 OuterShiftAmt, DL, MVT::i32);
4933 if (N->getOpcode() == ISD::SRA) {
4934 // If we have a arithmetic right shift, we need to use the signed bfe
4949 // For the BFE operations we form here from "and" and "srl", always use the
4950 // unsigned variants.
4951 if (Val.getValueType() == MVT::i32) {
4953 Opc = NVPTX::BFE_S32rii;
4955 Opc = NVPTX::BFE_U32rii;
4957 } else if (Val.getValueType() == MVT::i64) {
4959 Opc = NVPTX::BFE_S64rii;
4961 Opc = NVPTX::BFE_U64rii;
4964 // We cannot handle this type
4972 return CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops);
4975 // SelectDirectAddr - Match a direct address for DAG.
4976 // A direct address could be a globaladdress or externalsymbol.
4977 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
4978 // Return true if TGA or ES.
4979 if (N.getOpcode() == ISD::TargetGlobalAddress ||
4980 N.getOpcode() == ISD::TargetExternalSymbol) {
4984 if (N.getOpcode() == NVPTXISD::Wrapper) {
4985 Address = N.getOperand(0);
4988 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
4989 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
4990 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
4991 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
4992 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
4998 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
4999 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5000 if (Addr.getOpcode() == ISD::ADD) {
5001 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5002 SDValue base = Addr.getOperand(0);
5003 if (SelectDirectAddr(base, Base)) {
5004 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5014 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
5015 SDValue &Base, SDValue &Offset) {
5016 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
5020 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
5021 SDValue &Base, SDValue &Offset) {
5022 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
5026 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
5027 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5028 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
5029 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5030 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
5033 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
5034 Addr.getOpcode() == ISD::TargetGlobalAddress)
5035 return false; // direct calls.
5037 if (Addr.getOpcode() == ISD::ADD) {
5038 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
5041 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5042 if (FrameIndexSDNode *FIN =
5043 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
5044 // Constant offset from frame ref.
5045 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5047 Base = Addr.getOperand(0);
5048 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5057 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5058 SDValue &Base, SDValue &Offset) {
5059 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5063 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5064 SDValue &Base, SDValue &Offset) {
5065 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5068 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5069 unsigned int spN) const {
5070 const Value *Src = nullptr;
5071 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
5072 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5074 Src = mN->getMemOperand()->getValue();
5078 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
5079 return (PT->getAddressSpace() == spN);
5083 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5084 /// inline asm expressions.
5085 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
5086 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
5088 switch (ConstraintID) {
5091 case InlineAsm::Constraint_m: // memory
5092 if (SelectDirectAddr(Op, Op0)) {
5093 OutOps.push_back(Op0);
5094 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
5097 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5098 OutOps.push_back(Op0);
5099 OutOps.push_back(Op1);