1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines an instruction selector for the NVPTX target.
12 //===----------------------------------------------------------------------===//
14 #include "NVPTXISelDAGToDAG.h"
15 #include "llvm/IR/GlobalValue.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/Target/TargetIntrinsicInfo.h"
25 #define DEBUG_TYPE "nvptx-isel"
27 static cl::opt<int> UsePrecDivF32(
28 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
29 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
30 " IEEE Compliant F32 div.rnd if available."),
34 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
35 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
39 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
40 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
44 /// createNVPTXISelDag - This pass converts a legalized DAG into a
45 /// NVPTX-specific DAG, ready for instruction scheduling.
46 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
47 llvm::CodeGenOpt::Level OptLevel) {
48 return new NVPTXDAGToDAGISel(TM, OptLevel);
51 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
52 CodeGenOpt::Level OptLevel)
53 : SelectionDAGISel(tm, OptLevel),
54 Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
55 doMulWide = (OptLevel > 0);
58 int NVPTXDAGToDAGISel::getDivF32Level() const {
59 if (UsePrecDivF32.getNumOccurrences() > 0) {
60 // If nvptx-prec-div32=N is used on the command-line, always honor it
63 // Otherwise, use div.approx if fast math is enabled
64 if (TM.Options.UnsafeFPMath)
71 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
72 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
73 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
74 return UsePrecSqrtF32;
76 // Otherwise, use sqrt.approx if fast math is enabled
77 if (TM.Options.UnsafeFPMath)
84 bool NVPTXDAGToDAGISel::useF32FTZ() const {
85 if (FtzEnabled.getNumOccurrences() > 0) {
86 // If nvptx-f32ftz is used on the command-line, always honor it
89 const Function *F = MF->getFunction();
90 // Otherwise, check for an nvptx-f32ftz attribute on the function
91 if (F->hasFnAttribute("nvptx-f32ftz"))
92 return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex,
94 .getValueAsString() == "true");
100 bool NVPTXDAGToDAGISel::allowFMA() const {
101 const NVPTXTargetLowering *TL = Subtarget.getTargetLowering();
102 return TL->allowFMA(*MF, OptLevel);
105 /// Select - Select instructions not customized! Used for
106 /// expanded, promoted and normal instructions.
107 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
109 if (N->isMachineOpcode()) {
111 return nullptr; // Already selected.
114 SDNode *ResNode = nullptr;
115 switch (N->getOpcode()) {
117 ResNode = SelectLoad(N);
120 ResNode = SelectStore(N);
122 case NVPTXISD::LoadV2:
123 case NVPTXISD::LoadV4:
124 ResNode = SelectLoadVector(N);
126 case NVPTXISD::LDGV2:
127 case NVPTXISD::LDGV4:
128 case NVPTXISD::LDUV2:
129 case NVPTXISD::LDUV4:
130 ResNode = SelectLDGLDU(N);
132 case NVPTXISD::StoreV2:
133 case NVPTXISD::StoreV4:
134 ResNode = SelectStoreVector(N);
136 case NVPTXISD::LoadParam:
137 case NVPTXISD::LoadParamV2:
138 case NVPTXISD::LoadParamV4:
139 ResNode = SelectLoadParam(N);
141 case NVPTXISD::StoreRetval:
142 case NVPTXISD::StoreRetvalV2:
143 case NVPTXISD::StoreRetvalV4:
144 ResNode = SelectStoreRetval(N);
146 case NVPTXISD::StoreParam:
147 case NVPTXISD::StoreParamV2:
148 case NVPTXISD::StoreParamV4:
149 case NVPTXISD::StoreParamS32:
150 case NVPTXISD::StoreParamU32:
151 ResNode = SelectStoreParam(N);
153 case ISD::INTRINSIC_WO_CHAIN:
154 ResNode = SelectIntrinsicNoChain(N);
156 case ISD::INTRINSIC_W_CHAIN:
157 ResNode = SelectIntrinsicChain(N);
159 case NVPTXISD::Tex1DFloatS32:
160 case NVPTXISD::Tex1DFloatFloat:
161 case NVPTXISD::Tex1DFloatFloatLevel:
162 case NVPTXISD::Tex1DFloatFloatGrad:
163 case NVPTXISD::Tex1DS32S32:
164 case NVPTXISD::Tex1DS32Float:
165 case NVPTXISD::Tex1DS32FloatLevel:
166 case NVPTXISD::Tex1DS32FloatGrad:
167 case NVPTXISD::Tex1DU32S32:
168 case NVPTXISD::Tex1DU32Float:
169 case NVPTXISD::Tex1DU32FloatLevel:
170 case NVPTXISD::Tex1DU32FloatGrad:
171 case NVPTXISD::Tex1DArrayFloatS32:
172 case NVPTXISD::Tex1DArrayFloatFloat:
173 case NVPTXISD::Tex1DArrayFloatFloatLevel:
174 case NVPTXISD::Tex1DArrayFloatFloatGrad:
175 case NVPTXISD::Tex1DArrayS32S32:
176 case NVPTXISD::Tex1DArrayS32Float:
177 case NVPTXISD::Tex1DArrayS32FloatLevel:
178 case NVPTXISD::Tex1DArrayS32FloatGrad:
179 case NVPTXISD::Tex1DArrayU32S32:
180 case NVPTXISD::Tex1DArrayU32Float:
181 case NVPTXISD::Tex1DArrayU32FloatLevel:
182 case NVPTXISD::Tex1DArrayU32FloatGrad:
183 case NVPTXISD::Tex2DFloatS32:
184 case NVPTXISD::Tex2DFloatFloat:
185 case NVPTXISD::Tex2DFloatFloatLevel:
186 case NVPTXISD::Tex2DFloatFloatGrad:
187 case NVPTXISD::Tex2DS32S32:
188 case NVPTXISD::Tex2DS32Float:
189 case NVPTXISD::Tex2DS32FloatLevel:
190 case NVPTXISD::Tex2DS32FloatGrad:
191 case NVPTXISD::Tex2DU32S32:
192 case NVPTXISD::Tex2DU32Float:
193 case NVPTXISD::Tex2DU32FloatLevel:
194 case NVPTXISD::Tex2DU32FloatGrad:
195 case NVPTXISD::Tex2DArrayFloatS32:
196 case NVPTXISD::Tex2DArrayFloatFloat:
197 case NVPTXISD::Tex2DArrayFloatFloatLevel:
198 case NVPTXISD::Tex2DArrayFloatFloatGrad:
199 case NVPTXISD::Tex2DArrayS32S32:
200 case NVPTXISD::Tex2DArrayS32Float:
201 case NVPTXISD::Tex2DArrayS32FloatLevel:
202 case NVPTXISD::Tex2DArrayS32FloatGrad:
203 case NVPTXISD::Tex2DArrayU32S32:
204 case NVPTXISD::Tex2DArrayU32Float:
205 case NVPTXISD::Tex2DArrayU32FloatLevel:
206 case NVPTXISD::Tex2DArrayU32FloatGrad:
207 case NVPTXISD::Tex3DFloatS32:
208 case NVPTXISD::Tex3DFloatFloat:
209 case NVPTXISD::Tex3DFloatFloatLevel:
210 case NVPTXISD::Tex3DFloatFloatGrad:
211 case NVPTXISD::Tex3DS32S32:
212 case NVPTXISD::Tex3DS32Float:
213 case NVPTXISD::Tex3DS32FloatLevel:
214 case NVPTXISD::Tex3DS32FloatGrad:
215 case NVPTXISD::Tex3DU32S32:
216 case NVPTXISD::Tex3DU32Float:
217 case NVPTXISD::Tex3DU32FloatLevel:
218 case NVPTXISD::Tex3DU32FloatGrad:
219 case NVPTXISD::TexCubeFloatFloat:
220 case NVPTXISD::TexCubeFloatFloatLevel:
221 case NVPTXISD::TexCubeS32Float:
222 case NVPTXISD::TexCubeS32FloatLevel:
223 case NVPTXISD::TexCubeU32Float:
224 case NVPTXISD::TexCubeU32FloatLevel:
225 case NVPTXISD::TexCubeArrayFloatFloat:
226 case NVPTXISD::TexCubeArrayFloatFloatLevel:
227 case NVPTXISD::TexCubeArrayS32Float:
228 case NVPTXISD::TexCubeArrayS32FloatLevel:
229 case NVPTXISD::TexCubeArrayU32Float:
230 case NVPTXISD::TexCubeArrayU32FloatLevel:
231 case NVPTXISD::Tld4R2DFloatFloat:
232 case NVPTXISD::Tld4G2DFloatFloat:
233 case NVPTXISD::Tld4B2DFloatFloat:
234 case NVPTXISD::Tld4A2DFloatFloat:
235 case NVPTXISD::Tld4R2DS64Float:
236 case NVPTXISD::Tld4G2DS64Float:
237 case NVPTXISD::Tld4B2DS64Float:
238 case NVPTXISD::Tld4A2DS64Float:
239 case NVPTXISD::Tld4R2DU64Float:
240 case NVPTXISD::Tld4G2DU64Float:
241 case NVPTXISD::Tld4B2DU64Float:
242 case NVPTXISD::Tld4A2DU64Float:
243 case NVPTXISD::TexUnified1DFloatS32:
244 case NVPTXISD::TexUnified1DFloatFloat:
245 case NVPTXISD::TexUnified1DFloatFloatLevel:
246 case NVPTXISD::TexUnified1DFloatFloatGrad:
247 case NVPTXISD::TexUnified1DS32S32:
248 case NVPTXISD::TexUnified1DS32Float:
249 case NVPTXISD::TexUnified1DS32FloatLevel:
250 case NVPTXISD::TexUnified1DS32FloatGrad:
251 case NVPTXISD::TexUnified1DU32S32:
252 case NVPTXISD::TexUnified1DU32Float:
253 case NVPTXISD::TexUnified1DU32FloatLevel:
254 case NVPTXISD::TexUnified1DU32FloatGrad:
255 case NVPTXISD::TexUnified1DArrayFloatS32:
256 case NVPTXISD::TexUnified1DArrayFloatFloat:
257 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
258 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
259 case NVPTXISD::TexUnified1DArrayS32S32:
260 case NVPTXISD::TexUnified1DArrayS32Float:
261 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
262 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
263 case NVPTXISD::TexUnified1DArrayU32S32:
264 case NVPTXISD::TexUnified1DArrayU32Float:
265 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
266 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
267 case NVPTXISD::TexUnified2DFloatS32:
268 case NVPTXISD::TexUnified2DFloatFloat:
269 case NVPTXISD::TexUnified2DFloatFloatLevel:
270 case NVPTXISD::TexUnified2DFloatFloatGrad:
271 case NVPTXISD::TexUnified2DS32S32:
272 case NVPTXISD::TexUnified2DS32Float:
273 case NVPTXISD::TexUnified2DS32FloatLevel:
274 case NVPTXISD::TexUnified2DS32FloatGrad:
275 case NVPTXISD::TexUnified2DU32S32:
276 case NVPTXISD::TexUnified2DU32Float:
277 case NVPTXISD::TexUnified2DU32FloatLevel:
278 case NVPTXISD::TexUnified2DU32FloatGrad:
279 case NVPTXISD::TexUnified2DArrayFloatS32:
280 case NVPTXISD::TexUnified2DArrayFloatFloat:
281 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
282 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
283 case NVPTXISD::TexUnified2DArrayS32S32:
284 case NVPTXISD::TexUnified2DArrayS32Float:
285 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
286 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
287 case NVPTXISD::TexUnified2DArrayU32S32:
288 case NVPTXISD::TexUnified2DArrayU32Float:
289 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
290 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
291 case NVPTXISD::TexUnified3DFloatS32:
292 case NVPTXISD::TexUnified3DFloatFloat:
293 case NVPTXISD::TexUnified3DFloatFloatLevel:
294 case NVPTXISD::TexUnified3DFloatFloatGrad:
295 case NVPTXISD::TexUnified3DS32S32:
296 case NVPTXISD::TexUnified3DS32Float:
297 case NVPTXISD::TexUnified3DS32FloatLevel:
298 case NVPTXISD::TexUnified3DS32FloatGrad:
299 case NVPTXISD::TexUnified3DU32S32:
300 case NVPTXISD::TexUnified3DU32Float:
301 case NVPTXISD::TexUnified3DU32FloatLevel:
302 case NVPTXISD::TexUnified3DU32FloatGrad:
303 case NVPTXISD::TexUnifiedCubeFloatFloat:
304 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
305 case NVPTXISD::TexUnifiedCubeS32Float:
306 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
307 case NVPTXISD::TexUnifiedCubeU32Float:
308 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
309 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
310 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
311 case NVPTXISD::TexUnifiedCubeArrayS32Float:
312 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
313 case NVPTXISD::TexUnifiedCubeArrayU32Float:
314 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
315 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
316 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
317 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
318 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
319 case NVPTXISD::Tld4UnifiedR2DS64Float:
320 case NVPTXISD::Tld4UnifiedG2DS64Float:
321 case NVPTXISD::Tld4UnifiedB2DS64Float:
322 case NVPTXISD::Tld4UnifiedA2DS64Float:
323 case NVPTXISD::Tld4UnifiedR2DU64Float:
324 case NVPTXISD::Tld4UnifiedG2DU64Float:
325 case NVPTXISD::Tld4UnifiedB2DU64Float:
326 case NVPTXISD::Tld4UnifiedA2DU64Float:
327 ResNode = SelectTextureIntrinsic(N);
329 case NVPTXISD::Suld1DI8Clamp:
330 case NVPTXISD::Suld1DI16Clamp:
331 case NVPTXISD::Suld1DI32Clamp:
332 case NVPTXISD::Suld1DI64Clamp:
333 case NVPTXISD::Suld1DV2I8Clamp:
334 case NVPTXISD::Suld1DV2I16Clamp:
335 case NVPTXISD::Suld1DV2I32Clamp:
336 case NVPTXISD::Suld1DV2I64Clamp:
337 case NVPTXISD::Suld1DV4I8Clamp:
338 case NVPTXISD::Suld1DV4I16Clamp:
339 case NVPTXISD::Suld1DV4I32Clamp:
340 case NVPTXISD::Suld1DArrayI8Clamp:
341 case NVPTXISD::Suld1DArrayI16Clamp:
342 case NVPTXISD::Suld1DArrayI32Clamp:
343 case NVPTXISD::Suld1DArrayI64Clamp:
344 case NVPTXISD::Suld1DArrayV2I8Clamp:
345 case NVPTXISD::Suld1DArrayV2I16Clamp:
346 case NVPTXISD::Suld1DArrayV2I32Clamp:
347 case NVPTXISD::Suld1DArrayV2I64Clamp:
348 case NVPTXISD::Suld1DArrayV4I8Clamp:
349 case NVPTXISD::Suld1DArrayV4I16Clamp:
350 case NVPTXISD::Suld1DArrayV4I32Clamp:
351 case NVPTXISD::Suld2DI8Clamp:
352 case NVPTXISD::Suld2DI16Clamp:
353 case NVPTXISD::Suld2DI32Clamp:
354 case NVPTXISD::Suld2DI64Clamp:
355 case NVPTXISD::Suld2DV2I8Clamp:
356 case NVPTXISD::Suld2DV2I16Clamp:
357 case NVPTXISD::Suld2DV2I32Clamp:
358 case NVPTXISD::Suld2DV2I64Clamp:
359 case NVPTXISD::Suld2DV4I8Clamp:
360 case NVPTXISD::Suld2DV4I16Clamp:
361 case NVPTXISD::Suld2DV4I32Clamp:
362 case NVPTXISD::Suld2DArrayI8Clamp:
363 case NVPTXISD::Suld2DArrayI16Clamp:
364 case NVPTXISD::Suld2DArrayI32Clamp:
365 case NVPTXISD::Suld2DArrayI64Clamp:
366 case NVPTXISD::Suld2DArrayV2I8Clamp:
367 case NVPTXISD::Suld2DArrayV2I16Clamp:
368 case NVPTXISD::Suld2DArrayV2I32Clamp:
369 case NVPTXISD::Suld2DArrayV2I64Clamp:
370 case NVPTXISD::Suld2DArrayV4I8Clamp:
371 case NVPTXISD::Suld2DArrayV4I16Clamp:
372 case NVPTXISD::Suld2DArrayV4I32Clamp:
373 case NVPTXISD::Suld3DI8Clamp:
374 case NVPTXISD::Suld3DI16Clamp:
375 case NVPTXISD::Suld3DI32Clamp:
376 case NVPTXISD::Suld3DI64Clamp:
377 case NVPTXISD::Suld3DV2I8Clamp:
378 case NVPTXISD::Suld3DV2I16Clamp:
379 case NVPTXISD::Suld3DV2I32Clamp:
380 case NVPTXISD::Suld3DV2I64Clamp:
381 case NVPTXISD::Suld3DV4I8Clamp:
382 case NVPTXISD::Suld3DV4I16Clamp:
383 case NVPTXISD::Suld3DV4I32Clamp:
384 case NVPTXISD::Suld1DI8Trap:
385 case NVPTXISD::Suld1DI16Trap:
386 case NVPTXISD::Suld1DI32Trap:
387 case NVPTXISD::Suld1DI64Trap:
388 case NVPTXISD::Suld1DV2I8Trap:
389 case NVPTXISD::Suld1DV2I16Trap:
390 case NVPTXISD::Suld1DV2I32Trap:
391 case NVPTXISD::Suld1DV2I64Trap:
392 case NVPTXISD::Suld1DV4I8Trap:
393 case NVPTXISD::Suld1DV4I16Trap:
394 case NVPTXISD::Suld1DV4I32Trap:
395 case NVPTXISD::Suld1DArrayI8Trap:
396 case NVPTXISD::Suld1DArrayI16Trap:
397 case NVPTXISD::Suld1DArrayI32Trap:
398 case NVPTXISD::Suld1DArrayI64Trap:
399 case NVPTXISD::Suld1DArrayV2I8Trap:
400 case NVPTXISD::Suld1DArrayV2I16Trap:
401 case NVPTXISD::Suld1DArrayV2I32Trap:
402 case NVPTXISD::Suld1DArrayV2I64Trap:
403 case NVPTXISD::Suld1DArrayV4I8Trap:
404 case NVPTXISD::Suld1DArrayV4I16Trap:
405 case NVPTXISD::Suld1DArrayV4I32Trap:
406 case NVPTXISD::Suld2DI8Trap:
407 case NVPTXISD::Suld2DI16Trap:
408 case NVPTXISD::Suld2DI32Trap:
409 case NVPTXISD::Suld2DI64Trap:
410 case NVPTXISD::Suld2DV2I8Trap:
411 case NVPTXISD::Suld2DV2I16Trap:
412 case NVPTXISD::Suld2DV2I32Trap:
413 case NVPTXISD::Suld2DV2I64Trap:
414 case NVPTXISD::Suld2DV4I8Trap:
415 case NVPTXISD::Suld2DV4I16Trap:
416 case NVPTXISD::Suld2DV4I32Trap:
417 case NVPTXISD::Suld2DArrayI8Trap:
418 case NVPTXISD::Suld2DArrayI16Trap:
419 case NVPTXISD::Suld2DArrayI32Trap:
420 case NVPTXISD::Suld2DArrayI64Trap:
421 case NVPTXISD::Suld2DArrayV2I8Trap:
422 case NVPTXISD::Suld2DArrayV2I16Trap:
423 case NVPTXISD::Suld2DArrayV2I32Trap:
424 case NVPTXISD::Suld2DArrayV2I64Trap:
425 case NVPTXISD::Suld2DArrayV4I8Trap:
426 case NVPTXISD::Suld2DArrayV4I16Trap:
427 case NVPTXISD::Suld2DArrayV4I32Trap:
428 case NVPTXISD::Suld3DI8Trap:
429 case NVPTXISD::Suld3DI16Trap:
430 case NVPTXISD::Suld3DI32Trap:
431 case NVPTXISD::Suld3DI64Trap:
432 case NVPTXISD::Suld3DV2I8Trap:
433 case NVPTXISD::Suld3DV2I16Trap:
434 case NVPTXISD::Suld3DV2I32Trap:
435 case NVPTXISD::Suld3DV2I64Trap:
436 case NVPTXISD::Suld3DV4I8Trap:
437 case NVPTXISD::Suld3DV4I16Trap:
438 case NVPTXISD::Suld3DV4I32Trap:
439 case NVPTXISD::Suld1DI8Zero:
440 case NVPTXISD::Suld1DI16Zero:
441 case NVPTXISD::Suld1DI32Zero:
442 case NVPTXISD::Suld1DI64Zero:
443 case NVPTXISD::Suld1DV2I8Zero:
444 case NVPTXISD::Suld1DV2I16Zero:
445 case NVPTXISD::Suld1DV2I32Zero:
446 case NVPTXISD::Suld1DV2I64Zero:
447 case NVPTXISD::Suld1DV4I8Zero:
448 case NVPTXISD::Suld1DV4I16Zero:
449 case NVPTXISD::Suld1DV4I32Zero:
450 case NVPTXISD::Suld1DArrayI8Zero:
451 case NVPTXISD::Suld1DArrayI16Zero:
452 case NVPTXISD::Suld1DArrayI32Zero:
453 case NVPTXISD::Suld1DArrayI64Zero:
454 case NVPTXISD::Suld1DArrayV2I8Zero:
455 case NVPTXISD::Suld1DArrayV2I16Zero:
456 case NVPTXISD::Suld1DArrayV2I32Zero:
457 case NVPTXISD::Suld1DArrayV2I64Zero:
458 case NVPTXISD::Suld1DArrayV4I8Zero:
459 case NVPTXISD::Suld1DArrayV4I16Zero:
460 case NVPTXISD::Suld1DArrayV4I32Zero:
461 case NVPTXISD::Suld2DI8Zero:
462 case NVPTXISD::Suld2DI16Zero:
463 case NVPTXISD::Suld2DI32Zero:
464 case NVPTXISD::Suld2DI64Zero:
465 case NVPTXISD::Suld2DV2I8Zero:
466 case NVPTXISD::Suld2DV2I16Zero:
467 case NVPTXISD::Suld2DV2I32Zero:
468 case NVPTXISD::Suld2DV2I64Zero:
469 case NVPTXISD::Suld2DV4I8Zero:
470 case NVPTXISD::Suld2DV4I16Zero:
471 case NVPTXISD::Suld2DV4I32Zero:
472 case NVPTXISD::Suld2DArrayI8Zero:
473 case NVPTXISD::Suld2DArrayI16Zero:
474 case NVPTXISD::Suld2DArrayI32Zero:
475 case NVPTXISD::Suld2DArrayI64Zero:
476 case NVPTXISD::Suld2DArrayV2I8Zero:
477 case NVPTXISD::Suld2DArrayV2I16Zero:
478 case NVPTXISD::Suld2DArrayV2I32Zero:
479 case NVPTXISD::Suld2DArrayV2I64Zero:
480 case NVPTXISD::Suld2DArrayV4I8Zero:
481 case NVPTXISD::Suld2DArrayV4I16Zero:
482 case NVPTXISD::Suld2DArrayV4I32Zero:
483 case NVPTXISD::Suld3DI8Zero:
484 case NVPTXISD::Suld3DI16Zero:
485 case NVPTXISD::Suld3DI32Zero:
486 case NVPTXISD::Suld3DI64Zero:
487 case NVPTXISD::Suld3DV2I8Zero:
488 case NVPTXISD::Suld3DV2I16Zero:
489 case NVPTXISD::Suld3DV2I32Zero:
490 case NVPTXISD::Suld3DV2I64Zero:
491 case NVPTXISD::Suld3DV4I8Zero:
492 case NVPTXISD::Suld3DV4I16Zero:
493 case NVPTXISD::Suld3DV4I32Zero:
494 ResNode = SelectSurfaceIntrinsic(N);
500 ResNode = SelectBFE(N);
502 case ISD::ADDRSPACECAST:
503 ResNode = SelectAddrSpaceCast(N);
510 return SelectCode(N);
513 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
514 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
518 case Intrinsic::nvvm_ldg_global_f:
519 case Intrinsic::nvvm_ldg_global_i:
520 case Intrinsic::nvvm_ldg_global_p:
521 case Intrinsic::nvvm_ldu_global_f:
522 case Intrinsic::nvvm_ldu_global_i:
523 case Intrinsic::nvvm_ldu_global_p:
524 return SelectLDGLDU(N);
528 static unsigned int getCodeAddrSpace(MemSDNode *N,
529 const NVPTXSubtarget &Subtarget) {
530 const Value *Src = N->getMemOperand()->getValue();
533 return NVPTX::PTXLdStInstCode::GENERIC;
535 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
536 switch (PT->getAddressSpace()) {
537 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
538 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
539 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
540 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
541 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
542 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
546 return NVPTX::PTXLdStInstCode::GENERIC;
549 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
550 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
554 case Intrinsic::nvvm_texsurf_handle_internal:
555 return SelectTexSurfHandle(N);
559 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
560 // Op 0 is the intrinsic ID
561 SDValue Wrapper = N->getOperand(1);
562 SDValue GlobalVal = Wrapper.getOperand(0);
563 return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
567 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
568 SDValue Src = N->getOperand(0);
569 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
570 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
571 unsigned DstAddrSpace = CastN->getDestAddressSpace();
573 assert(SrcAddrSpace != DstAddrSpace &&
574 "addrspacecast must be between different address spaces");
576 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
577 // Specific to generic
579 switch (SrcAddrSpace) {
580 default: report_fatal_error("Bad address space in addrspacecast");
581 case ADDRESS_SPACE_GLOBAL:
582 Opc = Subtarget.is64Bit() ? NVPTX::cvta_global_yes_64
583 : NVPTX::cvta_global_yes;
585 case ADDRESS_SPACE_SHARED:
586 Opc = Subtarget.is64Bit() ? NVPTX::cvta_shared_yes_64
587 : NVPTX::cvta_shared_yes;
589 case ADDRESS_SPACE_CONST:
590 Opc = Subtarget.is64Bit() ? NVPTX::cvta_const_yes_64
591 : NVPTX::cvta_const_yes;
593 case ADDRESS_SPACE_LOCAL:
594 Opc = Subtarget.is64Bit() ? NVPTX::cvta_local_yes_64
595 : NVPTX::cvta_local_yes;
598 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
600 // Generic to specific
601 if (SrcAddrSpace != 0)
602 report_fatal_error("Cannot cast between two non-generic address spaces");
604 switch (DstAddrSpace) {
605 default: report_fatal_error("Bad address space in addrspacecast");
606 case ADDRESS_SPACE_GLOBAL:
607 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_global_yes_64
608 : NVPTX::cvta_to_global_yes;
610 case ADDRESS_SPACE_SHARED:
611 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_shared_yes_64
612 : NVPTX::cvta_to_shared_yes;
614 case ADDRESS_SPACE_CONST:
615 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_const_yes_64
616 : NVPTX::cvta_to_const_yes;
618 case ADDRESS_SPACE_LOCAL:
619 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_local_yes_64
620 : NVPTX::cvta_to_local_yes;
623 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
627 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
629 LoadSDNode *LD = cast<LoadSDNode>(N);
630 EVT LoadedVT = LD->getMemoryVT();
631 SDNode *NVPTXLD = nullptr;
633 // do not support pre/post inc/dec
637 if (!LoadedVT.isSimple())
640 // Address Space Setting
641 unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
644 // - .volatile is only availalble for .global and .shared
645 bool isVolatile = LD->isVolatile();
646 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
647 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
648 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
652 MVT SimpleVT = LoadedVT.getSimpleVT();
653 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
654 if (SimpleVT.isVector()) {
655 unsigned num = SimpleVT.getVectorNumElements();
657 vecType = NVPTX::PTXLdStInstCode::V2;
659 vecType = NVPTX::PTXLdStInstCode::V4;
664 // Type Setting: fromType + fromTypeWidth
666 // Sign : ISD::SEXTLOAD
667 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
669 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
670 MVT ScalarVT = SimpleVT.getScalarType();
671 // Read at least 8 bits (predicates are stored as 8-bit values)
672 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
673 unsigned int fromType;
674 if ((LD->getExtensionType() == ISD::SEXTLOAD))
675 fromType = NVPTX::PTXLdStInstCode::Signed;
676 else if (ScalarVT.isFloatingPoint())
677 fromType = NVPTX::PTXLdStInstCode::Float;
679 fromType = NVPTX::PTXLdStInstCode::Unsigned;
681 // Create the machine instruction DAG
682 SDValue Chain = N->getOperand(0);
683 SDValue N1 = N->getOperand(1);
685 SDValue Offset, Base;
687 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
689 if (SelectDirectAddr(N1, Addr)) {
692 Opcode = NVPTX::LD_i8_avar;
695 Opcode = NVPTX::LD_i16_avar;
698 Opcode = NVPTX::LD_i32_avar;
701 Opcode = NVPTX::LD_i64_avar;
704 Opcode = NVPTX::LD_f32_avar;
707 Opcode = NVPTX::LD_f64_avar;
712 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
713 getI32Imm(vecType), getI32Imm(fromType),
714 getI32Imm(fromTypeWidth), Addr, Chain };
715 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
716 } else if (Subtarget.is64Bit()
717 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
718 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
721 Opcode = NVPTX::LD_i8_asi;
724 Opcode = NVPTX::LD_i16_asi;
727 Opcode = NVPTX::LD_i32_asi;
730 Opcode = NVPTX::LD_i64_asi;
733 Opcode = NVPTX::LD_f32_asi;
736 Opcode = NVPTX::LD_f64_asi;
741 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
742 getI32Imm(vecType), getI32Imm(fromType),
743 getI32Imm(fromTypeWidth), Base, Offset, Chain };
744 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
745 } else if (Subtarget.is64Bit()
746 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
747 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
748 if (Subtarget.is64Bit()) {
751 Opcode = NVPTX::LD_i8_ari_64;
754 Opcode = NVPTX::LD_i16_ari_64;
757 Opcode = NVPTX::LD_i32_ari_64;
760 Opcode = NVPTX::LD_i64_ari_64;
763 Opcode = NVPTX::LD_f32_ari_64;
766 Opcode = NVPTX::LD_f64_ari_64;
774 Opcode = NVPTX::LD_i8_ari;
777 Opcode = NVPTX::LD_i16_ari;
780 Opcode = NVPTX::LD_i32_ari;
783 Opcode = NVPTX::LD_i64_ari;
786 Opcode = NVPTX::LD_f32_ari;
789 Opcode = NVPTX::LD_f64_ari;
795 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
796 getI32Imm(vecType), getI32Imm(fromType),
797 getI32Imm(fromTypeWidth), Base, Offset, Chain };
798 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
800 if (Subtarget.is64Bit()) {
803 Opcode = NVPTX::LD_i8_areg_64;
806 Opcode = NVPTX::LD_i16_areg_64;
809 Opcode = NVPTX::LD_i32_areg_64;
812 Opcode = NVPTX::LD_i64_areg_64;
815 Opcode = NVPTX::LD_f32_areg_64;
818 Opcode = NVPTX::LD_f64_areg_64;
826 Opcode = NVPTX::LD_i8_areg;
829 Opcode = NVPTX::LD_i16_areg;
832 Opcode = NVPTX::LD_i32_areg;
835 Opcode = NVPTX::LD_i64_areg;
838 Opcode = NVPTX::LD_f32_areg;
841 Opcode = NVPTX::LD_f64_areg;
847 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
848 getI32Imm(vecType), getI32Imm(fromType),
849 getI32Imm(fromTypeWidth), N1, Chain };
850 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
854 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
855 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
856 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
862 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
864 SDValue Chain = N->getOperand(0);
865 SDValue Op1 = N->getOperand(1);
866 SDValue Addr, Offset, Base;
870 MemSDNode *MemSD = cast<MemSDNode>(N);
871 EVT LoadedVT = MemSD->getMemoryVT();
873 if (!LoadedVT.isSimple())
876 // Address Space Setting
877 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
880 // - .volatile is only availalble for .global and .shared
881 bool IsVolatile = MemSD->isVolatile();
882 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
883 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
884 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
888 MVT SimpleVT = LoadedVT.getSimpleVT();
890 // Type Setting: fromType + fromTypeWidth
892 // Sign : ISD::SEXTLOAD
893 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
895 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
896 MVT ScalarVT = SimpleVT.getScalarType();
897 // Read at least 8 bits (predicates are stored as 8-bit values)
898 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
899 unsigned int FromType;
900 // The last operand holds the original LoadSDNode::getExtensionType() value
901 unsigned ExtensionType = cast<ConstantSDNode>(
902 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
903 if (ExtensionType == ISD::SEXTLOAD)
904 FromType = NVPTX::PTXLdStInstCode::Signed;
905 else if (ScalarVT.isFloatingPoint())
906 FromType = NVPTX::PTXLdStInstCode::Float;
908 FromType = NVPTX::PTXLdStInstCode::Unsigned;
912 switch (N->getOpcode()) {
913 case NVPTXISD::LoadV2:
914 VecType = NVPTX::PTXLdStInstCode::V2;
916 case NVPTXISD::LoadV4:
917 VecType = NVPTX::PTXLdStInstCode::V4;
923 EVT EltVT = N->getValueType(0);
925 if (SelectDirectAddr(Op1, Addr)) {
926 switch (N->getOpcode()) {
929 case NVPTXISD::LoadV2:
930 switch (EltVT.getSimpleVT().SimpleTy) {
934 Opcode = NVPTX::LDV_i8_v2_avar;
937 Opcode = NVPTX::LDV_i16_v2_avar;
940 Opcode = NVPTX::LDV_i32_v2_avar;
943 Opcode = NVPTX::LDV_i64_v2_avar;
946 Opcode = NVPTX::LDV_f32_v2_avar;
949 Opcode = NVPTX::LDV_f64_v2_avar;
953 case NVPTXISD::LoadV4:
954 switch (EltVT.getSimpleVT().SimpleTy) {
958 Opcode = NVPTX::LDV_i8_v4_avar;
961 Opcode = NVPTX::LDV_i16_v4_avar;
964 Opcode = NVPTX::LDV_i32_v4_avar;
967 Opcode = NVPTX::LDV_f32_v4_avar;
973 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
974 getI32Imm(VecType), getI32Imm(FromType),
975 getI32Imm(FromTypeWidth), Addr, Chain };
976 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
977 } else if (Subtarget.is64Bit()
978 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
979 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
980 switch (N->getOpcode()) {
983 case NVPTXISD::LoadV2:
984 switch (EltVT.getSimpleVT().SimpleTy) {
988 Opcode = NVPTX::LDV_i8_v2_asi;
991 Opcode = NVPTX::LDV_i16_v2_asi;
994 Opcode = NVPTX::LDV_i32_v2_asi;
997 Opcode = NVPTX::LDV_i64_v2_asi;
1000 Opcode = NVPTX::LDV_f32_v2_asi;
1003 Opcode = NVPTX::LDV_f64_v2_asi;
1007 case NVPTXISD::LoadV4:
1008 switch (EltVT.getSimpleVT().SimpleTy) {
1012 Opcode = NVPTX::LDV_i8_v4_asi;
1015 Opcode = NVPTX::LDV_i16_v4_asi;
1018 Opcode = NVPTX::LDV_i32_v4_asi;
1021 Opcode = NVPTX::LDV_f32_v4_asi;
1027 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1028 getI32Imm(VecType), getI32Imm(FromType),
1029 getI32Imm(FromTypeWidth), Base, Offset, Chain };
1030 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1031 } else if (Subtarget.is64Bit()
1032 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1033 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1034 if (Subtarget.is64Bit()) {
1035 switch (N->getOpcode()) {
1038 case NVPTXISD::LoadV2:
1039 switch (EltVT.getSimpleVT().SimpleTy) {
1043 Opcode = NVPTX::LDV_i8_v2_ari_64;
1046 Opcode = NVPTX::LDV_i16_v2_ari_64;
1049 Opcode = NVPTX::LDV_i32_v2_ari_64;
1052 Opcode = NVPTX::LDV_i64_v2_ari_64;
1055 Opcode = NVPTX::LDV_f32_v2_ari_64;
1058 Opcode = NVPTX::LDV_f64_v2_ari_64;
1062 case NVPTXISD::LoadV4:
1063 switch (EltVT.getSimpleVT().SimpleTy) {
1067 Opcode = NVPTX::LDV_i8_v4_ari_64;
1070 Opcode = NVPTX::LDV_i16_v4_ari_64;
1073 Opcode = NVPTX::LDV_i32_v4_ari_64;
1076 Opcode = NVPTX::LDV_f32_v4_ari_64;
1082 switch (N->getOpcode()) {
1085 case NVPTXISD::LoadV2:
1086 switch (EltVT.getSimpleVT().SimpleTy) {
1090 Opcode = NVPTX::LDV_i8_v2_ari;
1093 Opcode = NVPTX::LDV_i16_v2_ari;
1096 Opcode = NVPTX::LDV_i32_v2_ari;
1099 Opcode = NVPTX::LDV_i64_v2_ari;
1102 Opcode = NVPTX::LDV_f32_v2_ari;
1105 Opcode = NVPTX::LDV_f64_v2_ari;
1109 case NVPTXISD::LoadV4:
1110 switch (EltVT.getSimpleVT().SimpleTy) {
1114 Opcode = NVPTX::LDV_i8_v4_ari;
1117 Opcode = NVPTX::LDV_i16_v4_ari;
1120 Opcode = NVPTX::LDV_i32_v4_ari;
1123 Opcode = NVPTX::LDV_f32_v4_ari;
1130 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1131 getI32Imm(VecType), getI32Imm(FromType),
1132 getI32Imm(FromTypeWidth), Base, Offset, Chain };
1134 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1136 if (Subtarget.is64Bit()) {
1137 switch (N->getOpcode()) {
1140 case NVPTXISD::LoadV2:
1141 switch (EltVT.getSimpleVT().SimpleTy) {
1145 Opcode = NVPTX::LDV_i8_v2_areg_64;
1148 Opcode = NVPTX::LDV_i16_v2_areg_64;
1151 Opcode = NVPTX::LDV_i32_v2_areg_64;
1154 Opcode = NVPTX::LDV_i64_v2_areg_64;
1157 Opcode = NVPTX::LDV_f32_v2_areg_64;
1160 Opcode = NVPTX::LDV_f64_v2_areg_64;
1164 case NVPTXISD::LoadV4:
1165 switch (EltVT.getSimpleVT().SimpleTy) {
1169 Opcode = NVPTX::LDV_i8_v4_areg_64;
1172 Opcode = NVPTX::LDV_i16_v4_areg_64;
1175 Opcode = NVPTX::LDV_i32_v4_areg_64;
1178 Opcode = NVPTX::LDV_f32_v4_areg_64;
1184 switch (N->getOpcode()) {
1187 case NVPTXISD::LoadV2:
1188 switch (EltVT.getSimpleVT().SimpleTy) {
1192 Opcode = NVPTX::LDV_i8_v2_areg;
1195 Opcode = NVPTX::LDV_i16_v2_areg;
1198 Opcode = NVPTX::LDV_i32_v2_areg;
1201 Opcode = NVPTX::LDV_i64_v2_areg;
1204 Opcode = NVPTX::LDV_f32_v2_areg;
1207 Opcode = NVPTX::LDV_f64_v2_areg;
1211 case NVPTXISD::LoadV4:
1212 switch (EltVT.getSimpleVT().SimpleTy) {
1216 Opcode = NVPTX::LDV_i8_v4_areg;
1219 Opcode = NVPTX::LDV_i16_v4_areg;
1222 Opcode = NVPTX::LDV_i32_v4_areg;
1225 Opcode = NVPTX::LDV_f32_v4_areg;
1232 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1233 getI32Imm(VecType), getI32Imm(FromType),
1234 getI32Imm(FromTypeWidth), Op1, Chain };
1235 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1238 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1239 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1240 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1245 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
1247 SDValue Chain = N->getOperand(0);
1252 // If this is an LDG intrinsic, the address is the third operand. Its its an
1253 // LDG/LDU SD node (from custom vector handling), then its the second operand
1254 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1255 Op1 = N->getOperand(2);
1256 Mem = cast<MemIntrinsicSDNode>(N);
1257 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1261 case Intrinsic::nvvm_ldg_global_f:
1262 case Intrinsic::nvvm_ldg_global_i:
1263 case Intrinsic::nvvm_ldg_global_p:
1266 case Intrinsic::nvvm_ldu_global_f:
1267 case Intrinsic::nvvm_ldu_global_i:
1268 case Intrinsic::nvvm_ldu_global_p:
1273 Op1 = N->getOperand(1);
1274 Mem = cast<MemSDNode>(N);
1280 SDValue Base, Offset, Addr;
1282 EVT EltVT = Mem->getMemoryVT();
1283 if (EltVT.isVector()) {
1284 EltVT = EltVT.getVectorElementType();
1287 if (SelectDirectAddr(Op1, Addr)) {
1288 switch (N->getOpcode()) {
1291 case ISD::INTRINSIC_W_CHAIN:
1293 switch (EltVT.getSimpleVT().SimpleTy) {
1297 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1300 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1303 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1306 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1309 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1312 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1316 switch (EltVT.getSimpleVT().SimpleTy) {
1320 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1323 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1326 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1329 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1332 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1335 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1340 case NVPTXISD::LDGV2:
1341 switch (EltVT.getSimpleVT().SimpleTy) {
1345 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1348 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1351 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1354 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1357 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1360 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1364 case NVPTXISD::LDUV2:
1365 switch (EltVT.getSimpleVT().SimpleTy) {
1369 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1372 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1375 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1378 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1381 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1384 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1388 case NVPTXISD::LDGV4:
1389 switch (EltVT.getSimpleVT().SimpleTy) {
1393 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1396 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1399 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1402 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1406 case NVPTXISD::LDUV4:
1407 switch (EltVT.getSimpleVT().SimpleTy) {
1411 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1414 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1417 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1420 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1426 SDValue Ops[] = { Addr, Chain };
1427 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1428 } else if (Subtarget.is64Bit()
1429 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1430 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1431 if (Subtarget.is64Bit()) {
1432 switch (N->getOpcode()) {
1435 case ISD::INTRINSIC_W_CHAIN:
1437 switch (EltVT.getSimpleVT().SimpleTy) {
1441 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1444 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1447 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1450 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1453 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1456 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1460 switch (EltVT.getSimpleVT().SimpleTy) {
1464 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1467 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1470 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1473 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1476 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1479 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1484 case NVPTXISD::LDGV2:
1485 switch (EltVT.getSimpleVT().SimpleTy) {
1489 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1492 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1495 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1498 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1501 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1504 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1508 case NVPTXISD::LDUV2:
1509 switch (EltVT.getSimpleVT().SimpleTy) {
1513 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1516 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1519 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1522 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1525 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1528 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1532 case NVPTXISD::LDGV4:
1533 switch (EltVT.getSimpleVT().SimpleTy) {
1537 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1540 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1543 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1546 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1550 case NVPTXISD::LDUV4:
1551 switch (EltVT.getSimpleVT().SimpleTy) {
1555 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1558 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1561 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1564 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1570 switch (N->getOpcode()) {
1573 case ISD::INTRINSIC_W_CHAIN:
1575 switch (EltVT.getSimpleVT().SimpleTy) {
1579 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1582 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1585 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1588 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1591 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1594 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1598 switch (EltVT.getSimpleVT().SimpleTy) {
1602 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1605 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1608 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1611 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1614 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1617 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1622 case NVPTXISD::LDGV2:
1623 switch (EltVT.getSimpleVT().SimpleTy) {
1627 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1630 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1633 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1636 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1639 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1642 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1646 case NVPTXISD::LDUV2:
1647 switch (EltVT.getSimpleVT().SimpleTy) {
1651 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1654 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1657 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1660 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1663 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1666 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1670 case NVPTXISD::LDGV4:
1671 switch (EltVT.getSimpleVT().SimpleTy) {
1675 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1678 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1681 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1684 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1688 case NVPTXISD::LDUV4:
1689 switch (EltVT.getSimpleVT().SimpleTy) {
1693 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1696 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1699 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1702 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1709 SDValue Ops[] = { Base, Offset, Chain };
1711 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1713 if (Subtarget.is64Bit()) {
1714 switch (N->getOpcode()) {
1717 case ISD::INTRINSIC_W_CHAIN:
1719 switch (EltVT.getSimpleVT().SimpleTy) {
1723 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1726 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1729 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1732 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1735 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1738 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1742 switch (EltVT.getSimpleVT().SimpleTy) {
1746 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1749 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1752 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1755 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1758 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1761 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1766 case NVPTXISD::LDGV2:
1767 switch (EltVT.getSimpleVT().SimpleTy) {
1771 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1774 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1777 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1780 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1783 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1786 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1790 case NVPTXISD::LDUV2:
1791 switch (EltVT.getSimpleVT().SimpleTy) {
1795 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1798 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1801 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1804 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1807 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1810 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1814 case NVPTXISD::LDGV4:
1815 switch (EltVT.getSimpleVT().SimpleTy) {
1819 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1822 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1825 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1828 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1832 case NVPTXISD::LDUV4:
1833 switch (EltVT.getSimpleVT().SimpleTy) {
1837 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1840 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1843 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1846 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1852 switch (N->getOpcode()) {
1855 case ISD::INTRINSIC_W_CHAIN:
1857 switch (EltVT.getSimpleVT().SimpleTy) {
1861 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1864 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1867 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1870 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1873 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1876 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1880 switch (EltVT.getSimpleVT().SimpleTy) {
1884 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1887 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1890 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1893 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1896 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1899 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1904 case NVPTXISD::LDGV2:
1905 switch (EltVT.getSimpleVT().SimpleTy) {
1909 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1912 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1915 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1918 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1921 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1924 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1928 case NVPTXISD::LDUV2:
1929 switch (EltVT.getSimpleVT().SimpleTy) {
1933 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1936 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1939 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1942 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1945 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1948 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1952 case NVPTXISD::LDGV4:
1953 switch (EltVT.getSimpleVT().SimpleTy) {
1957 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1960 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1963 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1966 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1970 case NVPTXISD::LDUV4:
1971 switch (EltVT.getSimpleVT().SimpleTy) {
1975 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1978 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1981 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1984 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1991 SDValue Ops[] = { Op1, Chain };
1992 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1995 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1996 MemRefs0[0] = Mem->getMemOperand();
1997 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
2002 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
2004 StoreSDNode *ST = cast<StoreSDNode>(N);
2005 EVT StoreVT = ST->getMemoryVT();
2006 SDNode *NVPTXST = nullptr;
2008 // do not support pre/post inc/dec
2009 if (ST->isIndexed())
2012 if (!StoreVT.isSimple())
2015 // Address Space Setting
2016 unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
2019 // - .volatile is only availalble for .global and .shared
2020 bool isVolatile = ST->isVolatile();
2021 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2022 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2023 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2027 MVT SimpleVT = StoreVT.getSimpleVT();
2028 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
2029 if (SimpleVT.isVector()) {
2030 unsigned num = SimpleVT.getVectorNumElements();
2032 vecType = NVPTX::PTXLdStInstCode::V2;
2034 vecType = NVPTX::PTXLdStInstCode::V4;
2039 // Type Setting: toType + toTypeWidth
2040 // - for integer type, always use 'u'
2042 MVT ScalarVT = SimpleVT.getScalarType();
2043 unsigned toTypeWidth = ScalarVT.getSizeInBits();
2044 unsigned int toType;
2045 if (ScalarVT.isFloatingPoint())
2046 toType = NVPTX::PTXLdStInstCode::Float;
2048 toType = NVPTX::PTXLdStInstCode::Unsigned;
2050 // Create the machine instruction DAG
2051 SDValue Chain = N->getOperand(0);
2052 SDValue N1 = N->getOperand(1);
2053 SDValue N2 = N->getOperand(2);
2055 SDValue Offset, Base;
2057 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
2059 if (SelectDirectAddr(N2, Addr)) {
2062 Opcode = NVPTX::ST_i8_avar;
2065 Opcode = NVPTX::ST_i16_avar;
2068 Opcode = NVPTX::ST_i32_avar;
2071 Opcode = NVPTX::ST_i64_avar;
2074 Opcode = NVPTX::ST_f32_avar;
2077 Opcode = NVPTX::ST_f64_avar;
2082 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2083 getI32Imm(vecType), getI32Imm(toType),
2084 getI32Imm(toTypeWidth), Addr, Chain };
2085 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2086 } else if (Subtarget.is64Bit()
2087 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2088 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2091 Opcode = NVPTX::ST_i8_asi;
2094 Opcode = NVPTX::ST_i16_asi;
2097 Opcode = NVPTX::ST_i32_asi;
2100 Opcode = NVPTX::ST_i64_asi;
2103 Opcode = NVPTX::ST_f32_asi;
2106 Opcode = NVPTX::ST_f64_asi;
2111 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2112 getI32Imm(vecType), getI32Imm(toType),
2113 getI32Imm(toTypeWidth), Base, Offset, Chain };
2114 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2115 } else if (Subtarget.is64Bit()
2116 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2117 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2118 if (Subtarget.is64Bit()) {
2121 Opcode = NVPTX::ST_i8_ari_64;
2124 Opcode = NVPTX::ST_i16_ari_64;
2127 Opcode = NVPTX::ST_i32_ari_64;
2130 Opcode = NVPTX::ST_i64_ari_64;
2133 Opcode = NVPTX::ST_f32_ari_64;
2136 Opcode = NVPTX::ST_f64_ari_64;
2144 Opcode = NVPTX::ST_i8_ari;
2147 Opcode = NVPTX::ST_i16_ari;
2150 Opcode = NVPTX::ST_i32_ari;
2153 Opcode = NVPTX::ST_i64_ari;
2156 Opcode = NVPTX::ST_f32_ari;
2159 Opcode = NVPTX::ST_f64_ari;
2165 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2166 getI32Imm(vecType), getI32Imm(toType),
2167 getI32Imm(toTypeWidth), Base, Offset, Chain };
2168 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2170 if (Subtarget.is64Bit()) {
2173 Opcode = NVPTX::ST_i8_areg_64;
2176 Opcode = NVPTX::ST_i16_areg_64;
2179 Opcode = NVPTX::ST_i32_areg_64;
2182 Opcode = NVPTX::ST_i64_areg_64;
2185 Opcode = NVPTX::ST_f32_areg_64;
2188 Opcode = NVPTX::ST_f64_areg_64;
2196 Opcode = NVPTX::ST_i8_areg;
2199 Opcode = NVPTX::ST_i16_areg;
2202 Opcode = NVPTX::ST_i32_areg;
2205 Opcode = NVPTX::ST_i64_areg;
2208 Opcode = NVPTX::ST_f32_areg;
2211 Opcode = NVPTX::ST_f64_areg;
2217 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2218 getI32Imm(vecType), getI32Imm(toType),
2219 getI32Imm(toTypeWidth), N2, Chain };
2220 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2224 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2225 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2226 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2232 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
2233 SDValue Chain = N->getOperand(0);
2234 SDValue Op1 = N->getOperand(1);
2235 SDValue Addr, Offset, Base;
2239 EVT EltVT = Op1.getValueType();
2240 MemSDNode *MemSD = cast<MemSDNode>(N);
2241 EVT StoreVT = MemSD->getMemoryVT();
2243 // Address Space Setting
2244 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
2246 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2247 report_fatal_error("Cannot store to pointer that points to constant "
2252 // - .volatile is only availalble for .global and .shared
2253 bool IsVolatile = MemSD->isVolatile();
2254 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2255 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2256 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2259 // Type Setting: toType + toTypeWidth
2260 // - for integer type, always use 'u'
2261 assert(StoreVT.isSimple() && "Store value is not simple");
2262 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2263 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2265 if (ScalarVT.isFloatingPoint())
2266 ToType = NVPTX::PTXLdStInstCode::Float;
2268 ToType = NVPTX::PTXLdStInstCode::Unsigned;
2270 SmallVector<SDValue, 12> StOps;
2274 switch (N->getOpcode()) {
2275 case NVPTXISD::StoreV2:
2276 VecType = NVPTX::PTXLdStInstCode::V2;
2277 StOps.push_back(N->getOperand(1));
2278 StOps.push_back(N->getOperand(2));
2279 N2 = N->getOperand(3);
2281 case NVPTXISD::StoreV4:
2282 VecType = NVPTX::PTXLdStInstCode::V4;
2283 StOps.push_back(N->getOperand(1));
2284 StOps.push_back(N->getOperand(2));
2285 StOps.push_back(N->getOperand(3));
2286 StOps.push_back(N->getOperand(4));
2287 N2 = N->getOperand(5);
2293 StOps.push_back(getI32Imm(IsVolatile));
2294 StOps.push_back(getI32Imm(CodeAddrSpace));
2295 StOps.push_back(getI32Imm(VecType));
2296 StOps.push_back(getI32Imm(ToType));
2297 StOps.push_back(getI32Imm(ToTypeWidth));
2299 if (SelectDirectAddr(N2, Addr)) {
2300 switch (N->getOpcode()) {
2303 case NVPTXISD::StoreV2:
2304 switch (EltVT.getSimpleVT().SimpleTy) {
2308 Opcode = NVPTX::STV_i8_v2_avar;
2311 Opcode = NVPTX::STV_i16_v2_avar;
2314 Opcode = NVPTX::STV_i32_v2_avar;
2317 Opcode = NVPTX::STV_i64_v2_avar;
2320 Opcode = NVPTX::STV_f32_v2_avar;
2323 Opcode = NVPTX::STV_f64_v2_avar;
2327 case NVPTXISD::StoreV4:
2328 switch (EltVT.getSimpleVT().SimpleTy) {
2332 Opcode = NVPTX::STV_i8_v4_avar;
2335 Opcode = NVPTX::STV_i16_v4_avar;
2338 Opcode = NVPTX::STV_i32_v4_avar;
2341 Opcode = NVPTX::STV_f32_v4_avar;
2346 StOps.push_back(Addr);
2347 } else if (Subtarget.is64Bit()
2348 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2349 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2350 switch (N->getOpcode()) {
2353 case NVPTXISD::StoreV2:
2354 switch (EltVT.getSimpleVT().SimpleTy) {
2358 Opcode = NVPTX::STV_i8_v2_asi;
2361 Opcode = NVPTX::STV_i16_v2_asi;
2364 Opcode = NVPTX::STV_i32_v2_asi;
2367 Opcode = NVPTX::STV_i64_v2_asi;
2370 Opcode = NVPTX::STV_f32_v2_asi;
2373 Opcode = NVPTX::STV_f64_v2_asi;
2377 case NVPTXISD::StoreV4:
2378 switch (EltVT.getSimpleVT().SimpleTy) {
2382 Opcode = NVPTX::STV_i8_v4_asi;
2385 Opcode = NVPTX::STV_i16_v4_asi;
2388 Opcode = NVPTX::STV_i32_v4_asi;
2391 Opcode = NVPTX::STV_f32_v4_asi;
2396 StOps.push_back(Base);
2397 StOps.push_back(Offset);
2398 } else if (Subtarget.is64Bit()
2399 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2400 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2401 if (Subtarget.is64Bit()) {
2402 switch (N->getOpcode()) {
2405 case NVPTXISD::StoreV2:
2406 switch (EltVT.getSimpleVT().SimpleTy) {
2410 Opcode = NVPTX::STV_i8_v2_ari_64;
2413 Opcode = NVPTX::STV_i16_v2_ari_64;
2416 Opcode = NVPTX::STV_i32_v2_ari_64;
2419 Opcode = NVPTX::STV_i64_v2_ari_64;
2422 Opcode = NVPTX::STV_f32_v2_ari_64;
2425 Opcode = NVPTX::STV_f64_v2_ari_64;
2429 case NVPTXISD::StoreV4:
2430 switch (EltVT.getSimpleVT().SimpleTy) {
2434 Opcode = NVPTX::STV_i8_v4_ari_64;
2437 Opcode = NVPTX::STV_i16_v4_ari_64;
2440 Opcode = NVPTX::STV_i32_v4_ari_64;
2443 Opcode = NVPTX::STV_f32_v4_ari_64;
2449 switch (N->getOpcode()) {
2452 case NVPTXISD::StoreV2:
2453 switch (EltVT.getSimpleVT().SimpleTy) {
2457 Opcode = NVPTX::STV_i8_v2_ari;
2460 Opcode = NVPTX::STV_i16_v2_ari;
2463 Opcode = NVPTX::STV_i32_v2_ari;
2466 Opcode = NVPTX::STV_i64_v2_ari;
2469 Opcode = NVPTX::STV_f32_v2_ari;
2472 Opcode = NVPTX::STV_f64_v2_ari;
2476 case NVPTXISD::StoreV4:
2477 switch (EltVT.getSimpleVT().SimpleTy) {
2481 Opcode = NVPTX::STV_i8_v4_ari;
2484 Opcode = NVPTX::STV_i16_v4_ari;
2487 Opcode = NVPTX::STV_i32_v4_ari;
2490 Opcode = NVPTX::STV_f32_v4_ari;
2496 StOps.push_back(Base);
2497 StOps.push_back(Offset);
2499 if (Subtarget.is64Bit()) {
2500 switch (N->getOpcode()) {
2503 case NVPTXISD::StoreV2:
2504 switch (EltVT.getSimpleVT().SimpleTy) {
2508 Opcode = NVPTX::STV_i8_v2_areg_64;
2511 Opcode = NVPTX::STV_i16_v2_areg_64;
2514 Opcode = NVPTX::STV_i32_v2_areg_64;
2517 Opcode = NVPTX::STV_i64_v2_areg_64;
2520 Opcode = NVPTX::STV_f32_v2_areg_64;
2523 Opcode = NVPTX::STV_f64_v2_areg_64;
2527 case NVPTXISD::StoreV4:
2528 switch (EltVT.getSimpleVT().SimpleTy) {
2532 Opcode = NVPTX::STV_i8_v4_areg_64;
2535 Opcode = NVPTX::STV_i16_v4_areg_64;
2538 Opcode = NVPTX::STV_i32_v4_areg_64;
2541 Opcode = NVPTX::STV_f32_v4_areg_64;
2547 switch (N->getOpcode()) {
2550 case NVPTXISD::StoreV2:
2551 switch (EltVT.getSimpleVT().SimpleTy) {
2555 Opcode = NVPTX::STV_i8_v2_areg;
2558 Opcode = NVPTX::STV_i16_v2_areg;
2561 Opcode = NVPTX::STV_i32_v2_areg;
2564 Opcode = NVPTX::STV_i64_v2_areg;
2567 Opcode = NVPTX::STV_f32_v2_areg;
2570 Opcode = NVPTX::STV_f64_v2_areg;
2574 case NVPTXISD::StoreV4:
2575 switch (EltVT.getSimpleVT().SimpleTy) {
2579 Opcode = NVPTX::STV_i8_v4_areg;
2582 Opcode = NVPTX::STV_i16_v4_areg;
2585 Opcode = NVPTX::STV_i32_v4_areg;
2588 Opcode = NVPTX::STV_f32_v4_areg;
2594 StOps.push_back(N2);
2597 StOps.push_back(Chain);
2599 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2601 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2602 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2603 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2608 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2609 SDValue Chain = Node->getOperand(0);
2610 SDValue Offset = Node->getOperand(2);
2611 SDValue Flag = Node->getOperand(3);
2613 MemSDNode *Mem = cast<MemSDNode>(Node);
2616 switch (Node->getOpcode()) {
2619 case NVPTXISD::LoadParam:
2622 case NVPTXISD::LoadParamV2:
2625 case NVPTXISD::LoadParamV4:
2630 EVT EltVT = Node->getValueType(0);
2631 EVT MemVT = Mem->getMemoryVT();
2639 switch (MemVT.getSimpleVT().SimpleTy) {
2643 Opc = NVPTX::LoadParamMemI8;
2646 Opc = NVPTX::LoadParamMemI8;
2649 Opc = NVPTX::LoadParamMemI16;
2652 Opc = NVPTX::LoadParamMemI32;
2655 Opc = NVPTX::LoadParamMemI64;
2658 Opc = NVPTX::LoadParamMemF32;
2661 Opc = NVPTX::LoadParamMemF64;
2666 switch (MemVT.getSimpleVT().SimpleTy) {
2670 Opc = NVPTX::LoadParamMemV2I8;
2673 Opc = NVPTX::LoadParamMemV2I8;
2676 Opc = NVPTX::LoadParamMemV2I16;
2679 Opc = NVPTX::LoadParamMemV2I32;
2682 Opc = NVPTX::LoadParamMemV2I64;
2685 Opc = NVPTX::LoadParamMemV2F32;
2688 Opc = NVPTX::LoadParamMemV2F64;
2693 switch (MemVT.getSimpleVT().SimpleTy) {
2697 Opc = NVPTX::LoadParamMemV4I8;
2700 Opc = NVPTX::LoadParamMemV4I8;
2703 Opc = NVPTX::LoadParamMemV4I16;
2706 Opc = NVPTX::LoadParamMemV4I32;
2709 Opc = NVPTX::LoadParamMemV4F32;
2717 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2718 } else if (VecSize == 2) {
2719 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2721 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2722 VTs = CurDAG->getVTList(EVTs);
2725 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2727 SmallVector<SDValue, 2> Ops;
2728 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2729 Ops.push_back(Chain);
2730 Ops.push_back(Flag);
2733 CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2737 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2739 SDValue Chain = N->getOperand(0);
2740 SDValue Offset = N->getOperand(1);
2741 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2742 MemSDNode *Mem = cast<MemSDNode>(N);
2744 // How many elements do we have?
2745 unsigned NumElts = 1;
2746 switch (N->getOpcode()) {
2749 case NVPTXISD::StoreRetval:
2752 case NVPTXISD::StoreRetvalV2:
2755 case NVPTXISD::StoreRetvalV4:
2760 // Build vector of operands
2761 SmallVector<SDValue, 6> Ops;
2762 for (unsigned i = 0; i < NumElts; ++i)
2763 Ops.push_back(N->getOperand(i + 2));
2764 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2765 Ops.push_back(Chain);
2767 // Determine target opcode
2768 // If we have an i1, use an 8-bit store. The lowering code in
2769 // NVPTXISelLowering will have already emitted an upcast.
2770 unsigned Opcode = 0;
2775 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2779 Opcode = NVPTX::StoreRetvalI8;
2782 Opcode = NVPTX::StoreRetvalI8;
2785 Opcode = NVPTX::StoreRetvalI16;
2788 Opcode = NVPTX::StoreRetvalI32;
2791 Opcode = NVPTX::StoreRetvalI64;
2794 Opcode = NVPTX::StoreRetvalF32;
2797 Opcode = NVPTX::StoreRetvalF64;
2802 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2806 Opcode = NVPTX::StoreRetvalV2I8;
2809 Opcode = NVPTX::StoreRetvalV2I8;
2812 Opcode = NVPTX::StoreRetvalV2I16;
2815 Opcode = NVPTX::StoreRetvalV2I32;
2818 Opcode = NVPTX::StoreRetvalV2I64;
2821 Opcode = NVPTX::StoreRetvalV2F32;
2824 Opcode = NVPTX::StoreRetvalV2F64;
2829 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2833 Opcode = NVPTX::StoreRetvalV4I8;
2836 Opcode = NVPTX::StoreRetvalV4I8;
2839 Opcode = NVPTX::StoreRetvalV4I16;
2842 Opcode = NVPTX::StoreRetvalV4I32;
2845 Opcode = NVPTX::StoreRetvalV4F32;
2852 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2853 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2854 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2855 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2860 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2862 SDValue Chain = N->getOperand(0);
2863 SDValue Param = N->getOperand(1);
2864 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2865 SDValue Offset = N->getOperand(2);
2866 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2867 MemSDNode *Mem = cast<MemSDNode>(N);
2868 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2870 // How many elements do we have?
2871 unsigned NumElts = 1;
2872 switch (N->getOpcode()) {
2875 case NVPTXISD::StoreParamU32:
2876 case NVPTXISD::StoreParamS32:
2877 case NVPTXISD::StoreParam:
2880 case NVPTXISD::StoreParamV2:
2883 case NVPTXISD::StoreParamV4:
2888 // Build vector of operands
2889 SmallVector<SDValue, 8> Ops;
2890 for (unsigned i = 0; i < NumElts; ++i)
2891 Ops.push_back(N->getOperand(i + 3));
2892 Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
2893 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2894 Ops.push_back(Chain);
2895 Ops.push_back(Flag);
2897 // Determine target opcode
2898 // If we have an i1, use an 8-bit store. The lowering code in
2899 // NVPTXISelLowering will have already emitted an upcast.
2900 unsigned Opcode = 0;
2901 switch (N->getOpcode()) {
2907 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2911 Opcode = NVPTX::StoreParamI8;
2914 Opcode = NVPTX::StoreParamI8;
2917 Opcode = NVPTX::StoreParamI16;
2920 Opcode = NVPTX::StoreParamI32;
2923 Opcode = NVPTX::StoreParamI64;
2926 Opcode = NVPTX::StoreParamF32;
2929 Opcode = NVPTX::StoreParamF64;
2934 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2938 Opcode = NVPTX::StoreParamV2I8;
2941 Opcode = NVPTX::StoreParamV2I8;
2944 Opcode = NVPTX::StoreParamV2I16;
2947 Opcode = NVPTX::StoreParamV2I32;
2950 Opcode = NVPTX::StoreParamV2I64;
2953 Opcode = NVPTX::StoreParamV2F32;
2956 Opcode = NVPTX::StoreParamV2F64;
2961 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2965 Opcode = NVPTX::StoreParamV4I8;
2968 Opcode = NVPTX::StoreParamV4I8;
2971 Opcode = NVPTX::StoreParamV4I16;
2974 Opcode = NVPTX::StoreParamV4I32;
2977 Opcode = NVPTX::StoreParamV4F32;
2983 // Special case: if we have a sign-extend/zero-extend node, insert the
2984 // conversion instruction first, and use that as the value operand to
2985 // the selected StoreParam node.
2986 case NVPTXISD::StoreParamU32: {
2987 Opcode = NVPTX::StoreParamI32;
2988 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2990 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2991 MVT::i32, Ops[0], CvtNone);
2992 Ops[0] = SDValue(Cvt, 0);
2995 case NVPTXISD::StoreParamS32: {
2996 Opcode = NVPTX::StoreParamI32;
2997 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2999 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
3000 MVT::i32, Ops[0], CvtNone);
3001 Ops[0] = SDValue(Cvt, 0);
3006 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
3008 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
3009 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
3010 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
3011 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
3016 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
3017 SDValue Chain = N->getOperand(0);
3018 SDNode *Ret = nullptr;
3020 SmallVector<SDValue, 8> Ops;
3022 switch (N->getOpcode()) {
3023 default: return nullptr;
3024 case NVPTXISD::Tex1DFloatS32:
3025 Opc = NVPTX::TEX_1D_F32_S32;
3027 case NVPTXISD::Tex1DFloatFloat:
3028 Opc = NVPTX::TEX_1D_F32_F32;
3030 case NVPTXISD::Tex1DFloatFloatLevel:
3031 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3033 case NVPTXISD::Tex1DFloatFloatGrad:
3034 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3036 case NVPTXISD::Tex1DS32S32:
3037 Opc = NVPTX::TEX_1D_S32_S32;
3039 case NVPTXISD::Tex1DS32Float:
3040 Opc = NVPTX::TEX_1D_S32_F32;
3042 case NVPTXISD::Tex1DS32FloatLevel:
3043 Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
3045 case NVPTXISD::Tex1DS32FloatGrad:
3046 Opc = NVPTX::TEX_1D_S32_F32_GRAD;
3048 case NVPTXISD::Tex1DU32S32:
3049 Opc = NVPTX::TEX_1D_U32_S32;
3051 case NVPTXISD::Tex1DU32Float:
3052 Opc = NVPTX::TEX_1D_U32_F32;
3054 case NVPTXISD::Tex1DU32FloatLevel:
3055 Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3057 case NVPTXISD::Tex1DU32FloatGrad:
3058 Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3060 case NVPTXISD::Tex1DArrayFloatS32:
3061 Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
3063 case NVPTXISD::Tex1DArrayFloatFloat:
3064 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3066 case NVPTXISD::Tex1DArrayFloatFloatLevel:
3067 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3069 case NVPTXISD::Tex1DArrayFloatFloatGrad:
3070 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3072 case NVPTXISD::Tex1DArrayS32S32:
3073 Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
3075 case NVPTXISD::Tex1DArrayS32Float:
3076 Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
3078 case NVPTXISD::Tex1DArrayS32FloatLevel:
3079 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
3081 case NVPTXISD::Tex1DArrayS32FloatGrad:
3082 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
3084 case NVPTXISD::Tex1DArrayU32S32:
3085 Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3087 case NVPTXISD::Tex1DArrayU32Float:
3088 Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3090 case NVPTXISD::Tex1DArrayU32FloatLevel:
3091 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3093 case NVPTXISD::Tex1DArrayU32FloatGrad:
3094 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3096 case NVPTXISD::Tex2DFloatS32:
3097 Opc = NVPTX::TEX_2D_F32_S32;
3099 case NVPTXISD::Tex2DFloatFloat:
3100 Opc = NVPTX::TEX_2D_F32_F32;
3102 case NVPTXISD::Tex2DFloatFloatLevel:
3103 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3105 case NVPTXISD::Tex2DFloatFloatGrad:
3106 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3108 case NVPTXISD::Tex2DS32S32:
3109 Opc = NVPTX::TEX_2D_S32_S32;
3111 case NVPTXISD::Tex2DS32Float:
3112 Opc = NVPTX::TEX_2D_S32_F32;
3114 case NVPTXISD::Tex2DS32FloatLevel:
3115 Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
3117 case NVPTXISD::Tex2DS32FloatGrad:
3118 Opc = NVPTX::TEX_2D_S32_F32_GRAD;
3120 case NVPTXISD::Tex2DU32S32:
3121 Opc = NVPTX::TEX_2D_U32_S32;
3123 case NVPTXISD::Tex2DU32Float:
3124 Opc = NVPTX::TEX_2D_U32_F32;
3126 case NVPTXISD::Tex2DU32FloatLevel:
3127 Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3129 case NVPTXISD::Tex2DU32FloatGrad:
3130 Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3132 case NVPTXISD::Tex2DArrayFloatS32:
3133 Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
3135 case NVPTXISD::Tex2DArrayFloatFloat:
3136 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3138 case NVPTXISD::Tex2DArrayFloatFloatLevel:
3139 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3141 case NVPTXISD::Tex2DArrayFloatFloatGrad:
3142 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3144 case NVPTXISD::Tex2DArrayS32S32:
3145 Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
3147 case NVPTXISD::Tex2DArrayS32Float:
3148 Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
3150 case NVPTXISD::Tex2DArrayS32FloatLevel:
3151 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
3153 case NVPTXISD::Tex2DArrayS32FloatGrad:
3154 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
3156 case NVPTXISD::Tex2DArrayU32S32:
3157 Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3159 case NVPTXISD::Tex2DArrayU32Float:
3160 Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3162 case NVPTXISD::Tex2DArrayU32FloatLevel:
3163 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3165 case NVPTXISD::Tex2DArrayU32FloatGrad:
3166 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3168 case NVPTXISD::Tex3DFloatS32:
3169 Opc = NVPTX::TEX_3D_F32_S32;
3171 case NVPTXISD::Tex3DFloatFloat:
3172 Opc = NVPTX::TEX_3D_F32_F32;
3174 case NVPTXISD::Tex3DFloatFloatLevel:
3175 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3177 case NVPTXISD::Tex3DFloatFloatGrad:
3178 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3180 case NVPTXISD::Tex3DS32S32:
3181 Opc = NVPTX::TEX_3D_S32_S32;
3183 case NVPTXISD::Tex3DS32Float:
3184 Opc = NVPTX::TEX_3D_S32_F32;
3186 case NVPTXISD::Tex3DS32FloatLevel:
3187 Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
3189 case NVPTXISD::Tex3DS32FloatGrad:
3190 Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3192 case NVPTXISD::Tex3DU32S32:
3193 Opc = NVPTX::TEX_3D_U32_S32;
3195 case NVPTXISD::Tex3DU32Float:
3196 Opc = NVPTX::TEX_3D_U32_F32;
3198 case NVPTXISD::Tex3DU32FloatLevel:
3199 Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3201 case NVPTXISD::Tex3DU32FloatGrad:
3202 Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3204 case NVPTXISD::TexCubeFloatFloat:
3205 Opc = NVPTX::TEX_CUBE_F32_F32;
3207 case NVPTXISD::TexCubeFloatFloatLevel:
3208 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3210 case NVPTXISD::TexCubeS32Float:
3211 Opc = NVPTX::TEX_CUBE_S32_F32;
3213 case NVPTXISD::TexCubeS32FloatLevel:
3214 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3216 case NVPTXISD::TexCubeU32Float:
3217 Opc = NVPTX::TEX_CUBE_U32_F32;
3219 case NVPTXISD::TexCubeU32FloatLevel:
3220 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3222 case NVPTXISD::TexCubeArrayFloatFloat:
3223 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3225 case NVPTXISD::TexCubeArrayFloatFloatLevel:
3226 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3228 case NVPTXISD::TexCubeArrayS32Float:
3229 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3231 case NVPTXISD::TexCubeArrayS32FloatLevel:
3232 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3234 case NVPTXISD::TexCubeArrayU32Float:
3235 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3237 case NVPTXISD::TexCubeArrayU32FloatLevel:
3238 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3240 case NVPTXISD::Tld4R2DFloatFloat:
3241 Opc = NVPTX::TLD4_R_2D_F32_F32;
3243 case NVPTXISD::Tld4G2DFloatFloat:
3244 Opc = NVPTX::TLD4_G_2D_F32_F32;
3246 case NVPTXISD::Tld4B2DFloatFloat:
3247 Opc = NVPTX::TLD4_B_2D_F32_F32;
3249 case NVPTXISD::Tld4A2DFloatFloat:
3250 Opc = NVPTX::TLD4_A_2D_F32_F32;
3252 case NVPTXISD::Tld4R2DS64Float:
3253 Opc = NVPTX::TLD4_R_2D_S32_F32;
3255 case NVPTXISD::Tld4G2DS64Float:
3256 Opc = NVPTX::TLD4_G_2D_S32_F32;
3258 case NVPTXISD::Tld4B2DS64Float:
3259 Opc = NVPTX::TLD4_B_2D_S32_F32;
3261 case NVPTXISD::Tld4A2DS64Float:
3262 Opc = NVPTX::TLD4_A_2D_S32_F32;
3264 case NVPTXISD::Tld4R2DU64Float:
3265 Opc = NVPTX::TLD4_R_2D_U32_F32;
3267 case NVPTXISD::Tld4G2DU64Float:
3268 Opc = NVPTX::TLD4_G_2D_U32_F32;
3270 case NVPTXISD::Tld4B2DU64Float:
3271 Opc = NVPTX::TLD4_B_2D_U32_F32;
3273 case NVPTXISD::Tld4A2DU64Float:
3274 Opc = NVPTX::TLD4_A_2D_U32_F32;
3276 case NVPTXISD::TexUnified1DFloatS32:
3277 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3279 case NVPTXISD::TexUnified1DFloatFloat:
3280 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3282 case NVPTXISD::TexUnified1DFloatFloatLevel:
3283 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3285 case NVPTXISD::TexUnified1DFloatFloatGrad:
3286 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3288 case NVPTXISD::TexUnified1DS32S32:
3289 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3291 case NVPTXISD::TexUnified1DS32Float:
3292 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3294 case NVPTXISD::TexUnified1DS32FloatLevel:
3295 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3297 case NVPTXISD::TexUnified1DS32FloatGrad:
3298 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3300 case NVPTXISD::TexUnified1DU32S32:
3301 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3303 case NVPTXISD::TexUnified1DU32Float:
3304 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3306 case NVPTXISD::TexUnified1DU32FloatLevel:
3307 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3309 case NVPTXISD::TexUnified1DU32FloatGrad:
3310 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3312 case NVPTXISD::TexUnified1DArrayFloatS32:
3313 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3315 case NVPTXISD::TexUnified1DArrayFloatFloat:
3316 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3318 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
3319 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3321 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
3322 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3324 case NVPTXISD::TexUnified1DArrayS32S32:
3325 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3327 case NVPTXISD::TexUnified1DArrayS32Float:
3328 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3330 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
3331 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3333 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
3334 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3336 case NVPTXISD::TexUnified1DArrayU32S32:
3337 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3339 case NVPTXISD::TexUnified1DArrayU32Float:
3340 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3342 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
3343 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3345 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
3346 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3348 case NVPTXISD::TexUnified2DFloatS32:
3349 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3351 case NVPTXISD::TexUnified2DFloatFloat:
3352 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3354 case NVPTXISD::TexUnified2DFloatFloatLevel:
3355 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3357 case NVPTXISD::TexUnified2DFloatFloatGrad:
3358 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3360 case NVPTXISD::TexUnified2DS32S32:
3361 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3363 case NVPTXISD::TexUnified2DS32Float:
3364 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3366 case NVPTXISD::TexUnified2DS32FloatLevel:
3367 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3369 case NVPTXISD::TexUnified2DS32FloatGrad:
3370 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3372 case NVPTXISD::TexUnified2DU32S32:
3373 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3375 case NVPTXISD::TexUnified2DU32Float:
3376 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3378 case NVPTXISD::TexUnified2DU32FloatLevel:
3379 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3381 case NVPTXISD::TexUnified2DU32FloatGrad:
3382 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3384 case NVPTXISD::TexUnified2DArrayFloatS32:
3385 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3387 case NVPTXISD::TexUnified2DArrayFloatFloat:
3388 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3390 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
3391 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3393 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
3394 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3396 case NVPTXISD::TexUnified2DArrayS32S32:
3397 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3399 case NVPTXISD::TexUnified2DArrayS32Float:
3400 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3402 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
3403 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3405 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
3406 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3408 case NVPTXISD::TexUnified2DArrayU32S32:
3409 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3411 case NVPTXISD::TexUnified2DArrayU32Float:
3412 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3414 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
3415 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3417 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
3418 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3420 case NVPTXISD::TexUnified3DFloatS32:
3421 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3423 case NVPTXISD::TexUnified3DFloatFloat:
3424 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3426 case NVPTXISD::TexUnified3DFloatFloatLevel:
3427 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3429 case NVPTXISD::TexUnified3DFloatFloatGrad:
3430 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3432 case NVPTXISD::TexUnified3DS32S32:
3433 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3435 case NVPTXISD::TexUnified3DS32Float:
3436 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3438 case NVPTXISD::TexUnified3DS32FloatLevel:
3439 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3441 case NVPTXISD::TexUnified3DS32FloatGrad:
3442 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3444 case NVPTXISD::TexUnified3DU32S32:
3445 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3447 case NVPTXISD::TexUnified3DU32Float:
3448 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3450 case NVPTXISD::TexUnified3DU32FloatLevel:
3451 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3453 case NVPTXISD::TexUnified3DU32FloatGrad:
3454 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3456 case NVPTXISD::TexUnifiedCubeFloatFloat:
3457 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3459 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
3460 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3462 case NVPTXISD::TexUnifiedCubeS32Float:
3463 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3465 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
3466 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3468 case NVPTXISD::TexUnifiedCubeU32Float:
3469 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3471 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
3472 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3474 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
3475 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3477 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
3478 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3480 case NVPTXISD::TexUnifiedCubeArrayS32Float:
3481 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3483 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
3484 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3486 case NVPTXISD::TexUnifiedCubeArrayU32Float:
3487 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3489 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
3490 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3492 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
3493 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3495 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
3496 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3498 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
3499 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3501 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
3502 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3504 case NVPTXISD::Tld4UnifiedR2DS64Float:
3505 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3507 case NVPTXISD::Tld4UnifiedG2DS64Float:
3508 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3510 case NVPTXISD::Tld4UnifiedB2DS64Float:
3511 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3513 case NVPTXISD::Tld4UnifiedA2DS64Float:
3514 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3516 case NVPTXISD::Tld4UnifiedR2DU64Float:
3517 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3519 case NVPTXISD::Tld4UnifiedG2DU64Float:
3520 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3522 case NVPTXISD::Tld4UnifiedB2DU64Float:
3523 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3525 case NVPTXISD::Tld4UnifiedA2DU64Float:
3526 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
3530 // Copy over operands
3531 for (unsigned i = 1; i < N->getNumOperands(); ++i) {
3532 Ops.push_back(N->getOperand(i));
3535 Ops.push_back(Chain);
3536 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3540 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
3541 SDValue Chain = N->getOperand(0);
3542 SDValue TexHandle = N->getOperand(1);
3543 SDNode *Ret = nullptr;
3545 SmallVector<SDValue, 8> Ops;
3546 switch (N->getOpcode()) {
3547 default: return nullptr;
3548 case NVPTXISD::Suld1DI8Clamp:
3549 Opc = NVPTX::SULD_1D_I8_CLAMP;
3550 Ops.push_back(TexHandle);
3551 Ops.push_back(N->getOperand(2));
3552 Ops.push_back(Chain);
3554 case NVPTXISD::Suld1DI16Clamp:
3555 Opc = NVPTX::SULD_1D_I16_CLAMP;
3556 Ops.push_back(TexHandle);
3557 Ops.push_back(N->getOperand(2));
3558 Ops.push_back(Chain);
3560 case NVPTXISD::Suld1DI32Clamp:
3561 Opc = NVPTX::SULD_1D_I32_CLAMP;
3562 Ops.push_back(TexHandle);
3563 Ops.push_back(N->getOperand(2));
3564 Ops.push_back(Chain);
3566 case NVPTXISD::Suld1DI64Clamp:
3567 Opc = NVPTX::SULD_1D_I64_CLAMP;
3568 Ops.push_back(TexHandle);
3569 Ops.push_back(N->getOperand(2));
3570 Ops.push_back(Chain);
3572 case NVPTXISD::Suld1DV2I8Clamp:
3573 Opc = NVPTX::SULD_1D_V2I8_CLAMP;
3574 Ops.push_back(TexHandle);
3575 Ops.push_back(N->getOperand(2));
3576 Ops.push_back(Chain);
3578 case NVPTXISD::Suld1DV2I16Clamp:
3579 Opc = NVPTX::SULD_1D_V2I16_CLAMP;
3580 Ops.push_back(TexHandle);
3581 Ops.push_back(N->getOperand(2));
3582 Ops.push_back(Chain);
3584 case NVPTXISD::Suld1DV2I32Clamp:
3585 Opc = NVPTX::SULD_1D_V2I32_CLAMP;
3586 Ops.push_back(TexHandle);
3587 Ops.push_back(N->getOperand(2));
3588 Ops.push_back(Chain);
3590 case NVPTXISD::Suld1DV2I64Clamp:
3591 Opc = NVPTX::SULD_1D_V2I64_CLAMP;
3592 Ops.push_back(TexHandle);
3593 Ops.push_back(N->getOperand(2));
3594 Ops.push_back(Chain);
3596 case NVPTXISD::Suld1DV4I8Clamp:
3597 Opc = NVPTX::SULD_1D_V4I8_CLAMP;
3598 Ops.push_back(TexHandle);
3599 Ops.push_back(N->getOperand(2));
3600 Ops.push_back(Chain);
3602 case NVPTXISD::Suld1DV4I16Clamp:
3603 Opc = NVPTX::SULD_1D_V4I16_CLAMP;
3604 Ops.push_back(TexHandle);
3605 Ops.push_back(N->getOperand(2));
3606 Ops.push_back(Chain);
3608 case NVPTXISD::Suld1DV4I32Clamp:
3609 Opc = NVPTX::SULD_1D_V4I32_CLAMP;
3610 Ops.push_back(TexHandle);
3611 Ops.push_back(N->getOperand(2));
3612 Ops.push_back(Chain);
3614 case NVPTXISD::Suld1DArrayI8Clamp:
3615 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
3616 Ops.push_back(TexHandle);
3617 Ops.push_back(N->getOperand(2));
3618 Ops.push_back(N->getOperand(3));
3619 Ops.push_back(Chain);
3621 case NVPTXISD::Suld1DArrayI16Clamp:
3622 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
3623 Ops.push_back(TexHandle);
3624 Ops.push_back(N->getOperand(2));
3625 Ops.push_back(N->getOperand(3));
3626 Ops.push_back(Chain);
3628 case NVPTXISD::Suld1DArrayI32Clamp:
3629 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
3630 Ops.push_back(TexHandle);
3631 Ops.push_back(N->getOperand(2));
3632 Ops.push_back(N->getOperand(3));
3633 Ops.push_back(Chain);
3635 case NVPTXISD::Suld1DArrayI64Clamp:
3636 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
3637 Ops.push_back(TexHandle);
3638 Ops.push_back(N->getOperand(2));
3639 Ops.push_back(N->getOperand(3));
3640 Ops.push_back(Chain);
3642 case NVPTXISD::Suld1DArrayV2I8Clamp:
3643 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
3644 Ops.push_back(TexHandle);
3645 Ops.push_back(N->getOperand(2));
3646 Ops.push_back(N->getOperand(3));
3647 Ops.push_back(Chain);
3649 case NVPTXISD::Suld1DArrayV2I16Clamp:
3650 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
3651 Ops.push_back(TexHandle);
3652 Ops.push_back(N->getOperand(2));
3653 Ops.push_back(N->getOperand(3));
3654 Ops.push_back(Chain);
3656 case NVPTXISD::Suld1DArrayV2I32Clamp:
3657 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
3658 Ops.push_back(TexHandle);
3659 Ops.push_back(N->getOperand(2));
3660 Ops.push_back(N->getOperand(3));
3661 Ops.push_back(Chain);
3663 case NVPTXISD::Suld1DArrayV2I64Clamp:
3664 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
3665 Ops.push_back(TexHandle);
3666 Ops.push_back(N->getOperand(2));
3667 Ops.push_back(N->getOperand(3));
3668 Ops.push_back(Chain);
3670 case NVPTXISD::Suld1DArrayV4I8Clamp:
3671 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
3672 Ops.push_back(TexHandle);
3673 Ops.push_back(N->getOperand(2));
3674 Ops.push_back(N->getOperand(3));
3675 Ops.push_back(Chain);
3677 case NVPTXISD::Suld1DArrayV4I16Clamp:
3678 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
3679 Ops.push_back(TexHandle);
3680 Ops.push_back(N->getOperand(2));
3681 Ops.push_back(N->getOperand(3));
3682 Ops.push_back(Chain);
3684 case NVPTXISD::Suld1DArrayV4I32Clamp:
3685 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
3686 Ops.push_back(TexHandle);
3687 Ops.push_back(N->getOperand(2));
3688 Ops.push_back(N->getOperand(3));
3689 Ops.push_back(Chain);
3691 case NVPTXISD::Suld2DI8Clamp:
3692 Opc = NVPTX::SULD_2D_I8_CLAMP;
3693 Ops.push_back(TexHandle);
3694 Ops.push_back(N->getOperand(2));
3695 Ops.push_back(N->getOperand(3));
3696 Ops.push_back(Chain);
3698 case NVPTXISD::Suld2DI16Clamp:
3699 Opc = NVPTX::SULD_2D_I16_CLAMP;
3700 Ops.push_back(TexHandle);
3701 Ops.push_back(N->getOperand(2));
3702 Ops.push_back(N->getOperand(3));
3703 Ops.push_back(Chain);
3705 case NVPTXISD::Suld2DI32Clamp:
3706 Opc = NVPTX::SULD_2D_I32_CLAMP;
3707 Ops.push_back(TexHandle);
3708 Ops.push_back(N->getOperand(2));
3709 Ops.push_back(N->getOperand(3));
3710 Ops.push_back(Chain);
3712 case NVPTXISD::Suld2DI64Clamp:
3713 Opc = NVPTX::SULD_2D_I64_CLAMP;
3714 Ops.push_back(TexHandle);
3715 Ops.push_back(N->getOperand(2));
3716 Ops.push_back(N->getOperand(3));
3717 Ops.push_back(Chain);
3719 case NVPTXISD::Suld2DV2I8Clamp:
3720 Opc = NVPTX::SULD_2D_V2I8_CLAMP;
3721 Ops.push_back(TexHandle);
3722 Ops.push_back(N->getOperand(2));
3723 Ops.push_back(N->getOperand(3));
3724 Ops.push_back(Chain);
3726 case NVPTXISD::Suld2DV2I16Clamp:
3727 Opc = NVPTX::SULD_2D_V2I16_CLAMP;
3728 Ops.push_back(TexHandle);
3729 Ops.push_back(N->getOperand(2));
3730 Ops.push_back(N->getOperand(3));
3731 Ops.push_back(Chain);
3733 case NVPTXISD::Suld2DV2I32Clamp:
3734 Opc = NVPTX::SULD_2D_V2I32_CLAMP;
3735 Ops.push_back(TexHandle);
3736 Ops.push_back(N->getOperand(2));
3737 Ops.push_back(N->getOperand(3));
3738 Ops.push_back(Chain);
3740 case NVPTXISD::Suld2DV2I64Clamp:
3741 Opc = NVPTX::SULD_2D_V2I64_CLAMP;
3742 Ops.push_back(TexHandle);
3743 Ops.push_back(N->getOperand(2));
3744 Ops.push_back(N->getOperand(3));
3745 Ops.push_back(Chain);
3747 case NVPTXISD::Suld2DV4I8Clamp:
3748 Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3749 Ops.push_back(TexHandle);
3750 Ops.push_back(N->getOperand(2));
3751 Ops.push_back(N->getOperand(3));
3752 Ops.push_back(Chain);
3754 case NVPTXISD::Suld2DV4I16Clamp:
3755 Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3756 Ops.push_back(TexHandle);
3757 Ops.push_back(N->getOperand(2));
3758 Ops.push_back(N->getOperand(3));
3759 Ops.push_back(Chain);
3761 case NVPTXISD::Suld2DV4I32Clamp:
3762 Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3763 Ops.push_back(TexHandle);
3764 Ops.push_back(N->getOperand(2));
3765 Ops.push_back(N->getOperand(3));
3766 Ops.push_back(Chain);
3768 case NVPTXISD::Suld2DArrayI8Clamp:
3769 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3770 Ops.push_back(TexHandle);
3771 Ops.push_back(N->getOperand(2));
3772 Ops.push_back(N->getOperand(3));
3773 Ops.push_back(N->getOperand(4));
3774 Ops.push_back(Chain);
3776 case NVPTXISD::Suld2DArrayI16Clamp:
3777 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3778 Ops.push_back(TexHandle);
3779 Ops.push_back(N->getOperand(2));
3780 Ops.push_back(N->getOperand(3));
3781 Ops.push_back(N->getOperand(4));
3782 Ops.push_back(Chain);
3784 case NVPTXISD::Suld2DArrayI32Clamp:
3785 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3786 Ops.push_back(TexHandle);
3787 Ops.push_back(N->getOperand(2));
3788 Ops.push_back(N->getOperand(3));
3789 Ops.push_back(N->getOperand(4));
3790 Ops.push_back(Chain);
3792 case NVPTXISD::Suld2DArrayI64Clamp:
3793 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3794 Ops.push_back(TexHandle);
3795 Ops.push_back(N->getOperand(2));
3796 Ops.push_back(N->getOperand(3));
3797 Ops.push_back(N->getOperand(4));
3798 Ops.push_back(Chain);
3800 case NVPTXISD::Suld2DArrayV2I8Clamp:
3801 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3802 Ops.push_back(TexHandle);
3803 Ops.push_back(N->getOperand(2));
3804 Ops.push_back(N->getOperand(3));
3805 Ops.push_back(N->getOperand(4));
3806 Ops.push_back(Chain);
3808 case NVPTXISD::Suld2DArrayV2I16Clamp:
3809 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3810 Ops.push_back(TexHandle);
3811 Ops.push_back(N->getOperand(2));
3812 Ops.push_back(N->getOperand(3));
3813 Ops.push_back(N->getOperand(4));
3814 Ops.push_back(Chain);
3816 case NVPTXISD::Suld2DArrayV2I32Clamp:
3817 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3818 Ops.push_back(TexHandle);
3819 Ops.push_back(N->getOperand(2));
3820 Ops.push_back(N->getOperand(3));
3821 Ops.push_back(N->getOperand(4));
3822 Ops.push_back(Chain);
3824 case NVPTXISD::Suld2DArrayV2I64Clamp:
3825 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3826 Ops.push_back(TexHandle);
3827 Ops.push_back(N->getOperand(2));
3828 Ops.push_back(N->getOperand(3));
3829 Ops.push_back(N->getOperand(4));
3830 Ops.push_back(Chain);
3832 case NVPTXISD::Suld2DArrayV4I8Clamp:
3833 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3834 Ops.push_back(TexHandle);
3835 Ops.push_back(N->getOperand(2));
3836 Ops.push_back(N->getOperand(3));
3837 Ops.push_back(N->getOperand(4));
3838 Ops.push_back(Chain);
3840 case NVPTXISD::Suld2DArrayV4I16Clamp:
3841 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3842 Ops.push_back(TexHandle);
3843 Ops.push_back(N->getOperand(2));
3844 Ops.push_back(N->getOperand(3));
3845 Ops.push_back(N->getOperand(4));
3846 Ops.push_back(Chain);
3848 case NVPTXISD::Suld2DArrayV4I32Clamp:
3849 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3850 Ops.push_back(TexHandle);
3851 Ops.push_back(N->getOperand(2));
3852 Ops.push_back(N->getOperand(3));
3853 Ops.push_back(N->getOperand(4));
3854 Ops.push_back(Chain);
3856 case NVPTXISD::Suld3DI8Clamp:
3857 Opc = NVPTX::SULD_3D_I8_CLAMP;
3858 Ops.push_back(TexHandle);
3859 Ops.push_back(N->getOperand(2));
3860 Ops.push_back(N->getOperand(3));
3861 Ops.push_back(N->getOperand(4));
3862 Ops.push_back(Chain);
3864 case NVPTXISD::Suld3DI16Clamp:
3865 Opc = NVPTX::SULD_3D_I16_CLAMP;
3866 Ops.push_back(TexHandle);
3867 Ops.push_back(N->getOperand(2));
3868 Ops.push_back(N->getOperand(3));
3869 Ops.push_back(N->getOperand(4));
3870 Ops.push_back(Chain);
3872 case NVPTXISD::Suld3DI32Clamp:
3873 Opc = NVPTX::SULD_3D_I32_CLAMP;
3874 Ops.push_back(TexHandle);
3875 Ops.push_back(N->getOperand(2));
3876 Ops.push_back(N->getOperand(3));
3877 Ops.push_back(N->getOperand(4));
3878 Ops.push_back(Chain);
3880 case NVPTXISD::Suld3DI64Clamp:
3881 Opc = NVPTX::SULD_3D_I64_CLAMP;
3882 Ops.push_back(TexHandle);
3883 Ops.push_back(N->getOperand(2));
3884 Ops.push_back(N->getOperand(3));
3885 Ops.push_back(N->getOperand(4));
3886 Ops.push_back(Chain);
3888 case NVPTXISD::Suld3DV2I8Clamp:
3889 Opc = NVPTX::SULD_3D_V2I8_CLAMP;
3890 Ops.push_back(TexHandle);
3891 Ops.push_back(N->getOperand(2));
3892 Ops.push_back(N->getOperand(3));
3893 Ops.push_back(N->getOperand(4));
3894 Ops.push_back(Chain);
3896 case NVPTXISD::Suld3DV2I16Clamp:
3897 Opc = NVPTX::SULD_3D_V2I16_CLAMP;
3898 Ops.push_back(TexHandle);
3899 Ops.push_back(N->getOperand(2));
3900 Ops.push_back(N->getOperand(3));
3901 Ops.push_back(N->getOperand(4));
3902 Ops.push_back(Chain);
3904 case NVPTXISD::Suld3DV2I32Clamp:
3905 Opc = NVPTX::SULD_3D_V2I32_CLAMP;
3906 Ops.push_back(TexHandle);
3907 Ops.push_back(N->getOperand(2));
3908 Ops.push_back(N->getOperand(3));
3909 Ops.push_back(N->getOperand(4));
3910 Ops.push_back(Chain);
3912 case NVPTXISD::Suld3DV2I64Clamp:
3913 Opc = NVPTX::SULD_3D_V2I64_CLAMP;
3914 Ops.push_back(TexHandle);
3915 Ops.push_back(N->getOperand(2));
3916 Ops.push_back(N->getOperand(3));
3917 Ops.push_back(N->getOperand(4));
3918 Ops.push_back(Chain);
3920 case NVPTXISD::Suld3DV4I8Clamp:
3921 Opc = NVPTX::SULD_3D_V4I8_CLAMP;
3922 Ops.push_back(TexHandle);
3923 Ops.push_back(N->getOperand(2));
3924 Ops.push_back(N->getOperand(3));
3925 Ops.push_back(N->getOperand(4));
3926 Ops.push_back(Chain);
3928 case NVPTXISD::Suld3DV4I16Clamp:
3929 Opc = NVPTX::SULD_3D_V4I16_CLAMP;
3930 Ops.push_back(TexHandle);
3931 Ops.push_back(N->getOperand(2));
3932 Ops.push_back(N->getOperand(3));
3933 Ops.push_back(N->getOperand(4));
3934 Ops.push_back(Chain);
3936 case NVPTXISD::Suld3DV4I32Clamp:
3937 Opc = NVPTX::SULD_3D_V4I32_CLAMP;
3938 Ops.push_back(TexHandle);
3939 Ops.push_back(N->getOperand(2));
3940 Ops.push_back(N->getOperand(3));
3941 Ops.push_back(N->getOperand(4));
3942 Ops.push_back(Chain);
3944 case NVPTXISD::Suld1DI8Trap:
3945 Opc = NVPTX::SULD_1D_I8_TRAP;
3946 Ops.push_back(TexHandle);
3947 Ops.push_back(N->getOperand(2));
3948 Ops.push_back(Chain);
3950 case NVPTXISD::Suld1DI16Trap:
3951 Opc = NVPTX::SULD_1D_I16_TRAP;
3952 Ops.push_back(TexHandle);
3953 Ops.push_back(N->getOperand(2));
3954 Ops.push_back(Chain);
3956 case NVPTXISD::Suld1DI32Trap:
3957 Opc = NVPTX::SULD_1D_I32_TRAP;
3958 Ops.push_back(TexHandle);
3959 Ops.push_back(N->getOperand(2));
3960 Ops.push_back(Chain);
3962 case NVPTXISD::Suld1DI64Trap:
3963 Opc = NVPTX::SULD_1D_I64_TRAP;
3964 Ops.push_back(TexHandle);
3965 Ops.push_back(N->getOperand(2));
3966 Ops.push_back(Chain);
3968 case NVPTXISD::Suld1DV2I8Trap:
3969 Opc = NVPTX::SULD_1D_V2I8_TRAP;
3970 Ops.push_back(TexHandle);
3971 Ops.push_back(N->getOperand(2));
3972 Ops.push_back(Chain);
3974 case NVPTXISD::Suld1DV2I16Trap:
3975 Opc = NVPTX::SULD_1D_V2I16_TRAP;
3976 Ops.push_back(TexHandle);
3977 Ops.push_back(N->getOperand(2));
3978 Ops.push_back(Chain);
3980 case NVPTXISD::Suld1DV2I32Trap:
3981 Opc = NVPTX::SULD_1D_V2I32_TRAP;
3982 Ops.push_back(TexHandle);
3983 Ops.push_back(N->getOperand(2));
3984 Ops.push_back(Chain);
3986 case NVPTXISD::Suld1DV2I64Trap:
3987 Opc = NVPTX::SULD_1D_V2I64_TRAP;
3988 Ops.push_back(TexHandle);
3989 Ops.push_back(N->getOperand(2));
3990 Ops.push_back(Chain);
3992 case NVPTXISD::Suld1DV4I8Trap:
3993 Opc = NVPTX::SULD_1D_V4I8_TRAP;
3994 Ops.push_back(TexHandle);
3995 Ops.push_back(N->getOperand(2));
3996 Ops.push_back(Chain);
3998 case NVPTXISD::Suld1DV4I16Trap:
3999 Opc = NVPTX::SULD_1D_V4I16_TRAP;
4000 Ops.push_back(TexHandle);
4001 Ops.push_back(N->getOperand(2));
4002 Ops.push_back(Chain);
4004 case NVPTXISD::Suld1DV4I32Trap:
4005 Opc = NVPTX::SULD_1D_V4I32_TRAP;
4006 Ops.push_back(TexHandle);
4007 Ops.push_back(N->getOperand(2));
4008 Ops.push_back(Chain);
4010 case NVPTXISD::Suld1DArrayI8Trap:
4011 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
4012 Ops.push_back(TexHandle);
4013 Ops.push_back(N->getOperand(2));
4014 Ops.push_back(N->getOperand(3));
4015 Ops.push_back(Chain);
4017 case NVPTXISD::Suld1DArrayI16Trap:
4018 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4019 Ops.push_back(TexHandle);
4020 Ops.push_back(N->getOperand(2));
4021 Ops.push_back(N->getOperand(3));
4022 Ops.push_back(Chain);
4024 case NVPTXISD::Suld1DArrayI32Trap:
4025 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4026 Ops.push_back(TexHandle);
4027 Ops.push_back(N->getOperand(2));
4028 Ops.push_back(N->getOperand(3));
4029 Ops.push_back(Chain);
4031 case NVPTXISD::Suld1DArrayI64Trap:
4032 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4033 Ops.push_back(TexHandle);
4034 Ops.push_back(N->getOperand(2));
4035 Ops.push_back(N->getOperand(3));
4036 Ops.push_back(Chain);
4038 case NVPTXISD::Suld1DArrayV2I8Trap:
4039 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4040 Ops.push_back(TexHandle);
4041 Ops.push_back(N->getOperand(2));
4042 Ops.push_back(N->getOperand(3));
4043 Ops.push_back(Chain);
4045 case NVPTXISD::Suld1DArrayV2I16Trap:
4046 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4047 Ops.push_back(TexHandle);
4048 Ops.push_back(N->getOperand(2));
4049 Ops.push_back(N->getOperand(3));
4050 Ops.push_back(Chain);
4052 case NVPTXISD::Suld1DArrayV2I32Trap:
4053 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4054 Ops.push_back(TexHandle);
4055 Ops.push_back(N->getOperand(2));
4056 Ops.push_back(N->getOperand(3));
4057 Ops.push_back(Chain);
4059 case NVPTXISD::Suld1DArrayV2I64Trap:
4060 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4061 Ops.push_back(TexHandle);
4062 Ops.push_back(N->getOperand(2));
4063 Ops.push_back(N->getOperand(3));
4064 Ops.push_back(Chain);
4066 case NVPTXISD::Suld1DArrayV4I8Trap:
4067 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4068 Ops.push_back(TexHandle);
4069 Ops.push_back(N->getOperand(2));
4070 Ops.push_back(N->getOperand(3));
4071 Ops.push_back(Chain);
4073 case NVPTXISD::Suld1DArrayV4I16Trap:
4074 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4075 Ops.push_back(TexHandle);
4076 Ops.push_back(N->getOperand(2));
4077 Ops.push_back(N->getOperand(3));
4078 Ops.push_back(Chain);
4080 case NVPTXISD::Suld1DArrayV4I32Trap:
4081 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4082 Ops.push_back(TexHandle);
4083 Ops.push_back(N->getOperand(2));
4084 Ops.push_back(N->getOperand(3));
4085 Ops.push_back(Chain);
4087 case NVPTXISD::Suld2DI8Trap:
4088 Opc = NVPTX::SULD_2D_I8_TRAP;
4089 Ops.push_back(TexHandle);
4090 Ops.push_back(N->getOperand(2));
4091 Ops.push_back(N->getOperand(3));
4092 Ops.push_back(Chain);
4094 case NVPTXISD::Suld2DI16Trap:
4095 Opc = NVPTX::SULD_2D_I16_TRAP;
4096 Ops.push_back(TexHandle);
4097 Ops.push_back(N->getOperand(2));
4098 Ops.push_back(N->getOperand(3));
4099 Ops.push_back(Chain);
4101 case NVPTXISD::Suld2DI32Trap:
4102 Opc = NVPTX::SULD_2D_I32_TRAP;
4103 Ops.push_back(TexHandle);
4104 Ops.push_back(N->getOperand(2));
4105 Ops.push_back(N->getOperand(3));
4106 Ops.push_back(Chain);
4108 case NVPTXISD::Suld2DI64Trap:
4109 Opc = NVPTX::SULD_2D_I64_TRAP;
4110 Ops.push_back(TexHandle);
4111 Ops.push_back(N->getOperand(2));
4112 Ops.push_back(N->getOperand(3));
4113 Ops.push_back(Chain);
4115 case NVPTXISD::Suld2DV2I8Trap:
4116 Opc = NVPTX::SULD_2D_V2I8_TRAP;
4117 Ops.push_back(TexHandle);
4118 Ops.push_back(N->getOperand(2));
4119 Ops.push_back(N->getOperand(3));
4120 Ops.push_back(Chain);
4122 case NVPTXISD::Suld2DV2I16Trap:
4123 Opc = NVPTX::SULD_2D_V2I16_TRAP;
4124 Ops.push_back(TexHandle);
4125 Ops.push_back(N->getOperand(2));
4126 Ops.push_back(N->getOperand(3));
4127 Ops.push_back(Chain);
4129 case NVPTXISD::Suld2DV2I32Trap:
4130 Opc = NVPTX::SULD_2D_V2I32_TRAP;
4131 Ops.push_back(TexHandle);
4132 Ops.push_back(N->getOperand(2));
4133 Ops.push_back(N->getOperand(3));
4134 Ops.push_back(Chain);
4136 case NVPTXISD::Suld2DV2I64Trap:
4137 Opc = NVPTX::SULD_2D_V2I64_TRAP;
4138 Ops.push_back(TexHandle);
4139 Ops.push_back(N->getOperand(2));
4140 Ops.push_back(N->getOperand(3));
4141 Ops.push_back(Chain);
4143 case NVPTXISD::Suld2DV4I8Trap:
4144 Opc = NVPTX::SULD_2D_V4I8_TRAP;
4145 Ops.push_back(TexHandle);
4146 Ops.push_back(N->getOperand(2));
4147 Ops.push_back(N->getOperand(3));
4148 Ops.push_back(Chain);
4150 case NVPTXISD::Suld2DV4I16Trap:
4151 Opc = NVPTX::SULD_2D_V4I16_TRAP;
4152 Ops.push_back(TexHandle);
4153 Ops.push_back(N->getOperand(2));
4154 Ops.push_back(N->getOperand(3));
4155 Ops.push_back(Chain);
4157 case NVPTXISD::Suld2DV4I32Trap:
4158 Opc = NVPTX::SULD_2D_V4I32_TRAP;
4159 Ops.push_back(TexHandle);
4160 Ops.push_back(N->getOperand(2));
4161 Ops.push_back(N->getOperand(3));
4162 Ops.push_back(Chain);
4164 case NVPTXISD::Suld2DArrayI8Trap:
4165 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4166 Ops.push_back(TexHandle);
4167 Ops.push_back(N->getOperand(2));
4168 Ops.push_back(N->getOperand(3));
4169 Ops.push_back(N->getOperand(4));
4170 Ops.push_back(Chain);
4172 case NVPTXISD::Suld2DArrayI16Trap:
4173 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4174 Ops.push_back(TexHandle);
4175 Ops.push_back(N->getOperand(2));
4176 Ops.push_back(N->getOperand(3));
4177 Ops.push_back(N->getOperand(4));
4178 Ops.push_back(Chain);
4180 case NVPTXISD::Suld2DArrayI32Trap:
4181 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4182 Ops.push_back(TexHandle);
4183 Ops.push_back(N->getOperand(2));
4184 Ops.push_back(N->getOperand(3));
4185 Ops.push_back(N->getOperand(4));
4186 Ops.push_back(Chain);
4188 case NVPTXISD::Suld2DArrayI64Trap:
4189 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4190 Ops.push_back(TexHandle);
4191 Ops.push_back(N->getOperand(2));
4192 Ops.push_back(N->getOperand(3));
4193 Ops.push_back(N->getOperand(4));
4194 Ops.push_back(Chain);
4196 case NVPTXISD::Suld2DArrayV2I8Trap:
4197 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4198 Ops.push_back(TexHandle);
4199 Ops.push_back(N->getOperand(2));
4200 Ops.push_back(N->getOperand(3));
4201 Ops.push_back(N->getOperand(4));
4202 Ops.push_back(Chain);
4204 case NVPTXISD::Suld2DArrayV2I16Trap:
4205 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4206 Ops.push_back(TexHandle);
4207 Ops.push_back(N->getOperand(2));
4208 Ops.push_back(N->getOperand(3));
4209 Ops.push_back(N->getOperand(4));
4210 Ops.push_back(Chain);
4212 case NVPTXISD::Suld2DArrayV2I32Trap:
4213 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4214 Ops.push_back(TexHandle);
4215 Ops.push_back(N->getOperand(2));
4216 Ops.push_back(N->getOperand(3));
4217 Ops.push_back(N->getOperand(4));
4218 Ops.push_back(Chain);
4220 case NVPTXISD::Suld2DArrayV2I64Trap:
4221 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4222 Ops.push_back(TexHandle);
4223 Ops.push_back(N->getOperand(2));
4224 Ops.push_back(N->getOperand(3));
4225 Ops.push_back(N->getOperand(4));
4226 Ops.push_back(Chain);
4228 case NVPTXISD::Suld2DArrayV4I8Trap:
4229 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4230 Ops.push_back(TexHandle);
4231 Ops.push_back(N->getOperand(2));
4232 Ops.push_back(N->getOperand(3));
4233 Ops.push_back(N->getOperand(4));
4234 Ops.push_back(Chain);
4236 case NVPTXISD::Suld2DArrayV4I16Trap:
4237 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4238 Ops.push_back(TexHandle);
4239 Ops.push_back(N->getOperand(2));
4240 Ops.push_back(N->getOperand(3));
4241 Ops.push_back(N->getOperand(4));
4242 Ops.push_back(Chain);
4244 case NVPTXISD::Suld2DArrayV4I32Trap:
4245 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4246 Ops.push_back(TexHandle);
4247 Ops.push_back(N->getOperand(2));
4248 Ops.push_back(N->getOperand(3));
4249 Ops.push_back(N->getOperand(4));
4250 Ops.push_back(Chain);
4252 case NVPTXISD::Suld3DI8Trap:
4253 Opc = NVPTX::SULD_3D_I8_TRAP;
4254 Ops.push_back(TexHandle);
4255 Ops.push_back(N->getOperand(2));
4256 Ops.push_back(N->getOperand(3));
4257 Ops.push_back(N->getOperand(4));
4258 Ops.push_back(Chain);
4260 case NVPTXISD::Suld3DI16Trap:
4261 Opc = NVPTX::SULD_3D_I16_TRAP;
4262 Ops.push_back(TexHandle);
4263 Ops.push_back(N->getOperand(2));
4264 Ops.push_back(N->getOperand(3));
4265 Ops.push_back(N->getOperand(4));
4266 Ops.push_back(Chain);
4268 case NVPTXISD::Suld3DI32Trap:
4269 Opc = NVPTX::SULD_3D_I32_TRAP;
4270 Ops.push_back(TexHandle);
4271 Ops.push_back(N->getOperand(2));
4272 Ops.push_back(N->getOperand(3));
4273 Ops.push_back(N->getOperand(4));
4274 Ops.push_back(Chain);
4276 case NVPTXISD::Suld3DI64Trap:
4277 Opc = NVPTX::SULD_3D_I64_TRAP;
4278 Ops.push_back(TexHandle);
4279 Ops.push_back(N->getOperand(2));
4280 Ops.push_back(N->getOperand(3));
4281 Ops.push_back(N->getOperand(4));
4282 Ops.push_back(Chain);
4284 case NVPTXISD::Suld3DV2I8Trap:
4285 Opc = NVPTX::SULD_3D_V2I8_TRAP;
4286 Ops.push_back(TexHandle);
4287 Ops.push_back(N->getOperand(2));
4288 Ops.push_back(N->getOperand(3));
4289 Ops.push_back(N->getOperand(4));
4290 Ops.push_back(Chain);
4292 case NVPTXISD::Suld3DV2I16Trap:
4293 Opc = NVPTX::SULD_3D_V2I16_TRAP;
4294 Ops.push_back(TexHandle);
4295 Ops.push_back(N->getOperand(2));
4296 Ops.push_back(N->getOperand(3));
4297 Ops.push_back(N->getOperand(4));
4298 Ops.push_back(Chain);
4300 case NVPTXISD::Suld3DV2I32Trap:
4301 Opc = NVPTX::SULD_3D_V2I32_TRAP;
4302 Ops.push_back(TexHandle);
4303 Ops.push_back(N->getOperand(2));
4304 Ops.push_back(N->getOperand(3));
4305 Ops.push_back(N->getOperand(4));
4306 Ops.push_back(Chain);
4308 case NVPTXISD::Suld3DV2I64Trap:
4309 Opc = NVPTX::SULD_3D_V2I64_TRAP;
4310 Ops.push_back(TexHandle);
4311 Ops.push_back(N->getOperand(2));
4312 Ops.push_back(N->getOperand(3));
4313 Ops.push_back(N->getOperand(4));
4314 Ops.push_back(Chain);
4316 case NVPTXISD::Suld3DV4I8Trap:
4317 Opc = NVPTX::SULD_3D_V4I8_TRAP;
4318 Ops.push_back(TexHandle);
4319 Ops.push_back(N->getOperand(2));
4320 Ops.push_back(N->getOperand(3));
4321 Ops.push_back(N->getOperand(4));
4322 Ops.push_back(Chain);
4324 case NVPTXISD::Suld3DV4I16Trap:
4325 Opc = NVPTX::SULD_3D_V4I16_TRAP;
4326 Ops.push_back(TexHandle);
4327 Ops.push_back(N->getOperand(2));
4328 Ops.push_back(N->getOperand(3));
4329 Ops.push_back(N->getOperand(4));
4330 Ops.push_back(Chain);
4332 case NVPTXISD::Suld3DV4I32Trap:
4333 Opc = NVPTX::SULD_3D_V4I32_TRAP;
4334 Ops.push_back(TexHandle);
4335 Ops.push_back(N->getOperand(2));
4336 Ops.push_back(N->getOperand(3));
4337 Ops.push_back(N->getOperand(4));
4338 Ops.push_back(Chain);
4340 case NVPTXISD::Suld1DI8Zero:
4341 Opc = NVPTX::SULD_1D_I8_ZERO;
4342 Ops.push_back(TexHandle);
4343 Ops.push_back(N->getOperand(2));
4344 Ops.push_back(Chain);
4346 case NVPTXISD::Suld1DI16Zero:
4347 Opc = NVPTX::SULD_1D_I16_ZERO;
4348 Ops.push_back(TexHandle);
4349 Ops.push_back(N->getOperand(2));
4350 Ops.push_back(Chain);
4352 case NVPTXISD::Suld1DI32Zero:
4353 Opc = NVPTX::SULD_1D_I32_ZERO;
4354 Ops.push_back(TexHandle);
4355 Ops.push_back(N->getOperand(2));
4356 Ops.push_back(Chain);
4358 case NVPTXISD::Suld1DI64Zero:
4359 Opc = NVPTX::SULD_1D_I64_ZERO;
4360 Ops.push_back(TexHandle);
4361 Ops.push_back(N->getOperand(2));
4362 Ops.push_back(Chain);
4364 case NVPTXISD::Suld1DV2I8Zero:
4365 Opc = NVPTX::SULD_1D_V2I8_ZERO;
4366 Ops.push_back(TexHandle);
4367 Ops.push_back(N->getOperand(2));
4368 Ops.push_back(Chain);
4370 case NVPTXISD::Suld1DV2I16Zero:
4371 Opc = NVPTX::SULD_1D_V2I16_ZERO;
4372 Ops.push_back(TexHandle);
4373 Ops.push_back(N->getOperand(2));
4374 Ops.push_back(Chain);
4376 case NVPTXISD::Suld1DV2I32Zero:
4377 Opc = NVPTX::SULD_1D_V2I32_ZERO;
4378 Ops.push_back(TexHandle);
4379 Ops.push_back(N->getOperand(2));
4380 Ops.push_back(Chain);
4382 case NVPTXISD::Suld1DV2I64Zero:
4383 Opc = NVPTX::SULD_1D_V2I64_ZERO;
4384 Ops.push_back(TexHandle);
4385 Ops.push_back(N->getOperand(2));
4386 Ops.push_back(Chain);
4388 case NVPTXISD::Suld1DV4I8Zero:
4389 Opc = NVPTX::SULD_1D_V4I8_ZERO;
4390 Ops.push_back(TexHandle);
4391 Ops.push_back(N->getOperand(2));
4392 Ops.push_back(Chain);
4394 case NVPTXISD::Suld1DV4I16Zero:
4395 Opc = NVPTX::SULD_1D_V4I16_ZERO;
4396 Ops.push_back(TexHandle);
4397 Ops.push_back(N->getOperand(2));
4398 Ops.push_back(Chain);
4400 case NVPTXISD::Suld1DV4I32Zero:
4401 Opc = NVPTX::SULD_1D_V4I32_ZERO;
4402 Ops.push_back(TexHandle);
4403 Ops.push_back(N->getOperand(2));
4404 Ops.push_back(Chain);
4406 case NVPTXISD::Suld1DArrayI8Zero:
4407 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4408 Ops.push_back(TexHandle);
4409 Ops.push_back(N->getOperand(2));
4410 Ops.push_back(N->getOperand(3));
4411 Ops.push_back(Chain);
4413 case NVPTXISD::Suld1DArrayI16Zero:
4414 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4415 Ops.push_back(TexHandle);
4416 Ops.push_back(N->getOperand(2));
4417 Ops.push_back(N->getOperand(3));
4418 Ops.push_back(Chain);
4420 case NVPTXISD::Suld1DArrayI32Zero:
4421 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4422 Ops.push_back(TexHandle);
4423 Ops.push_back(N->getOperand(2));
4424 Ops.push_back(N->getOperand(3));
4425 Ops.push_back(Chain);
4427 case NVPTXISD::Suld1DArrayI64Zero:
4428 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4429 Ops.push_back(TexHandle);
4430 Ops.push_back(N->getOperand(2));
4431 Ops.push_back(N->getOperand(3));
4432 Ops.push_back(Chain);
4434 case NVPTXISD::Suld1DArrayV2I8Zero:
4435 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4436 Ops.push_back(TexHandle);
4437 Ops.push_back(N->getOperand(2));
4438 Ops.push_back(N->getOperand(3));
4439 Ops.push_back(Chain);
4441 case NVPTXISD::Suld1DArrayV2I16Zero:
4442 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4443 Ops.push_back(TexHandle);
4444 Ops.push_back(N->getOperand(2));
4445 Ops.push_back(N->getOperand(3));
4446 Ops.push_back(Chain);
4448 case NVPTXISD::Suld1DArrayV2I32Zero:
4449 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4450 Ops.push_back(TexHandle);
4451 Ops.push_back(N->getOperand(2));
4452 Ops.push_back(N->getOperand(3));
4453 Ops.push_back(Chain);
4455 case NVPTXISD::Suld1DArrayV2I64Zero:
4456 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4457 Ops.push_back(TexHandle);
4458 Ops.push_back(N->getOperand(2));
4459 Ops.push_back(N->getOperand(3));
4460 Ops.push_back(Chain);
4462 case NVPTXISD::Suld1DArrayV4I8Zero:
4463 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4464 Ops.push_back(TexHandle);
4465 Ops.push_back(N->getOperand(2));
4466 Ops.push_back(N->getOperand(3));
4467 Ops.push_back(Chain);
4469 case NVPTXISD::Suld1DArrayV4I16Zero:
4470 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4471 Ops.push_back(TexHandle);
4472 Ops.push_back(N->getOperand(2));
4473 Ops.push_back(N->getOperand(3));
4474 Ops.push_back(Chain);
4476 case NVPTXISD::Suld1DArrayV4I32Zero:
4477 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4478 Ops.push_back(TexHandle);
4479 Ops.push_back(N->getOperand(2));
4480 Ops.push_back(N->getOperand(3));
4481 Ops.push_back(Chain);
4483 case NVPTXISD::Suld2DI8Zero:
4484 Opc = NVPTX::SULD_2D_I8_ZERO;
4485 Ops.push_back(TexHandle);
4486 Ops.push_back(N->getOperand(2));
4487 Ops.push_back(N->getOperand(3));
4488 Ops.push_back(Chain);
4490 case NVPTXISD::Suld2DI16Zero:
4491 Opc = NVPTX::SULD_2D_I16_ZERO;
4492 Ops.push_back(TexHandle);
4493 Ops.push_back(N->getOperand(2));
4494 Ops.push_back(N->getOperand(3));
4495 Ops.push_back(Chain);
4497 case NVPTXISD::Suld2DI32Zero:
4498 Opc = NVPTX::SULD_2D_I32_ZERO;
4499 Ops.push_back(TexHandle);
4500 Ops.push_back(N->getOperand(2));
4501 Ops.push_back(N->getOperand(3));
4502 Ops.push_back(Chain);
4504 case NVPTXISD::Suld2DI64Zero:
4505 Opc = NVPTX::SULD_2D_I64_ZERO;
4506 Ops.push_back(TexHandle);
4507 Ops.push_back(N->getOperand(2));
4508 Ops.push_back(N->getOperand(3));
4509 Ops.push_back(Chain);
4511 case NVPTXISD::Suld2DV2I8Zero:
4512 Opc = NVPTX::SULD_2D_V2I8_ZERO;
4513 Ops.push_back(TexHandle);
4514 Ops.push_back(N->getOperand(2));
4515 Ops.push_back(N->getOperand(3));
4516 Ops.push_back(Chain);
4518 case NVPTXISD::Suld2DV2I16Zero:
4519 Opc = NVPTX::SULD_2D_V2I16_ZERO;
4520 Ops.push_back(TexHandle);
4521 Ops.push_back(N->getOperand(2));
4522 Ops.push_back(N->getOperand(3));
4523 Ops.push_back(Chain);
4525 case NVPTXISD::Suld2DV2I32Zero:
4526 Opc = NVPTX::SULD_2D_V2I32_ZERO;
4527 Ops.push_back(TexHandle);
4528 Ops.push_back(N->getOperand(2));
4529 Ops.push_back(N->getOperand(3));
4530 Ops.push_back(Chain);
4532 case NVPTXISD::Suld2DV2I64Zero:
4533 Opc = NVPTX::SULD_2D_V2I64_ZERO;
4534 Ops.push_back(TexHandle);
4535 Ops.push_back(N->getOperand(2));
4536 Ops.push_back(N->getOperand(3));
4537 Ops.push_back(Chain);
4539 case NVPTXISD::Suld2DV4I8Zero:
4540 Opc = NVPTX::SULD_2D_V4I8_ZERO;
4541 Ops.push_back(TexHandle);
4542 Ops.push_back(N->getOperand(2));
4543 Ops.push_back(N->getOperand(3));
4544 Ops.push_back(Chain);
4546 case NVPTXISD::Suld2DV4I16Zero:
4547 Opc = NVPTX::SULD_2D_V4I16_ZERO;
4548 Ops.push_back(TexHandle);
4549 Ops.push_back(N->getOperand(2));
4550 Ops.push_back(N->getOperand(3));
4551 Ops.push_back(Chain);
4553 case NVPTXISD::Suld2DV4I32Zero:
4554 Opc = NVPTX::SULD_2D_V4I32_ZERO;
4555 Ops.push_back(TexHandle);
4556 Ops.push_back(N->getOperand(2));
4557 Ops.push_back(N->getOperand(3));
4558 Ops.push_back(Chain);
4560 case NVPTXISD::Suld2DArrayI8Zero:
4561 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
4562 Ops.push_back(TexHandle);
4563 Ops.push_back(N->getOperand(2));
4564 Ops.push_back(N->getOperand(3));
4565 Ops.push_back(N->getOperand(4));
4566 Ops.push_back(Chain);
4568 case NVPTXISD::Suld2DArrayI16Zero:
4569 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
4570 Ops.push_back(TexHandle);
4571 Ops.push_back(N->getOperand(2));
4572 Ops.push_back(N->getOperand(3));
4573 Ops.push_back(N->getOperand(4));
4574 Ops.push_back(Chain);
4576 case NVPTXISD::Suld2DArrayI32Zero:
4577 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
4578 Ops.push_back(TexHandle);
4579 Ops.push_back(N->getOperand(2));
4580 Ops.push_back(N->getOperand(3));
4581 Ops.push_back(N->getOperand(4));
4582 Ops.push_back(Chain);
4584 case NVPTXISD::Suld2DArrayI64Zero:
4585 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
4586 Ops.push_back(TexHandle);
4587 Ops.push_back(N->getOperand(2));
4588 Ops.push_back(N->getOperand(3));
4589 Ops.push_back(N->getOperand(4));
4590 Ops.push_back(Chain);
4592 case NVPTXISD::Suld2DArrayV2I8Zero:
4593 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
4594 Ops.push_back(TexHandle);
4595 Ops.push_back(N->getOperand(2));
4596 Ops.push_back(N->getOperand(3));
4597 Ops.push_back(N->getOperand(4));
4598 Ops.push_back(Chain);
4600 case NVPTXISD::Suld2DArrayV2I16Zero:
4601 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
4602 Ops.push_back(TexHandle);
4603 Ops.push_back(N->getOperand(2));
4604 Ops.push_back(N->getOperand(3));
4605 Ops.push_back(N->getOperand(4));
4606 Ops.push_back(Chain);
4608 case NVPTXISD::Suld2DArrayV2I32Zero:
4609 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
4610 Ops.push_back(TexHandle);
4611 Ops.push_back(N->getOperand(2));
4612 Ops.push_back(N->getOperand(3));
4613 Ops.push_back(N->getOperand(4));
4614 Ops.push_back(Chain);
4616 case NVPTXISD::Suld2DArrayV2I64Zero:
4617 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
4618 Ops.push_back(TexHandle);
4619 Ops.push_back(N->getOperand(2));
4620 Ops.push_back(N->getOperand(3));
4621 Ops.push_back(N->getOperand(4));
4622 Ops.push_back(Chain);
4624 case NVPTXISD::Suld2DArrayV4I8Zero:
4625 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
4626 Ops.push_back(TexHandle);
4627 Ops.push_back(N->getOperand(2));
4628 Ops.push_back(N->getOperand(3));
4629 Ops.push_back(N->getOperand(4));
4630 Ops.push_back(Chain);
4632 case NVPTXISD::Suld2DArrayV4I16Zero:
4633 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
4634 Ops.push_back(TexHandle);
4635 Ops.push_back(N->getOperand(2));
4636 Ops.push_back(N->getOperand(3));
4637 Ops.push_back(N->getOperand(4));
4638 Ops.push_back(Chain);
4640 case NVPTXISD::Suld2DArrayV4I32Zero:
4641 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
4642 Ops.push_back(TexHandle);
4643 Ops.push_back(N->getOperand(2));
4644 Ops.push_back(N->getOperand(3));
4645 Ops.push_back(N->getOperand(4));
4646 Ops.push_back(Chain);
4648 case NVPTXISD::Suld3DI8Zero:
4649 Opc = NVPTX::SULD_3D_I8_ZERO;
4650 Ops.push_back(TexHandle);
4651 Ops.push_back(N->getOperand(2));
4652 Ops.push_back(N->getOperand(3));
4653 Ops.push_back(N->getOperand(4));
4654 Ops.push_back(Chain);
4656 case NVPTXISD::Suld3DI16Zero:
4657 Opc = NVPTX::SULD_3D_I16_ZERO;
4658 Ops.push_back(TexHandle);
4659 Ops.push_back(N->getOperand(2));
4660 Ops.push_back(N->getOperand(3));
4661 Ops.push_back(N->getOperand(4));
4662 Ops.push_back(Chain);
4664 case NVPTXISD::Suld3DI32Zero:
4665 Opc = NVPTX::SULD_3D_I32_ZERO;
4666 Ops.push_back(TexHandle);
4667 Ops.push_back(N->getOperand(2));
4668 Ops.push_back(N->getOperand(3));
4669 Ops.push_back(N->getOperand(4));
4670 Ops.push_back(Chain);
4672 case NVPTXISD::Suld3DI64Zero:
4673 Opc = NVPTX::SULD_3D_I64_ZERO;
4674 Ops.push_back(TexHandle);
4675 Ops.push_back(N->getOperand(2));
4676 Ops.push_back(N->getOperand(3));
4677 Ops.push_back(N->getOperand(4));
4678 Ops.push_back(Chain);
4680 case NVPTXISD::Suld3DV2I8Zero:
4681 Opc = NVPTX::SULD_3D_V2I8_ZERO;
4682 Ops.push_back(TexHandle);
4683 Ops.push_back(N->getOperand(2));
4684 Ops.push_back(N->getOperand(3));
4685 Ops.push_back(N->getOperand(4));
4686 Ops.push_back(Chain);
4688 case NVPTXISD::Suld3DV2I16Zero:
4689 Opc = NVPTX::SULD_3D_V2I16_ZERO;
4690 Ops.push_back(TexHandle);
4691 Ops.push_back(N->getOperand(2));
4692 Ops.push_back(N->getOperand(3));
4693 Ops.push_back(N->getOperand(4));
4694 Ops.push_back(Chain);
4696 case NVPTXISD::Suld3DV2I32Zero:
4697 Opc = NVPTX::SULD_3D_V2I32_ZERO;
4698 Ops.push_back(TexHandle);
4699 Ops.push_back(N->getOperand(2));
4700 Ops.push_back(N->getOperand(3));
4701 Ops.push_back(N->getOperand(4));
4702 Ops.push_back(Chain);
4704 case NVPTXISD::Suld3DV2I64Zero:
4705 Opc = NVPTX::SULD_3D_V2I64_ZERO;
4706 Ops.push_back(TexHandle);
4707 Ops.push_back(N->getOperand(2));
4708 Ops.push_back(N->getOperand(3));
4709 Ops.push_back(N->getOperand(4));
4710 Ops.push_back(Chain);
4712 case NVPTXISD::Suld3DV4I8Zero:
4713 Opc = NVPTX::SULD_3D_V4I8_ZERO;
4714 Ops.push_back(TexHandle);
4715 Ops.push_back(N->getOperand(2));
4716 Ops.push_back(N->getOperand(3));
4717 Ops.push_back(N->getOperand(4));
4718 Ops.push_back(Chain);
4720 case NVPTXISD::Suld3DV4I16Zero:
4721 Opc = NVPTX::SULD_3D_V4I16_ZERO;
4722 Ops.push_back(TexHandle);
4723 Ops.push_back(N->getOperand(2));
4724 Ops.push_back(N->getOperand(3));
4725 Ops.push_back(N->getOperand(4));
4726 Ops.push_back(Chain);
4728 case NVPTXISD::Suld3DV4I32Zero:
4729 Opc = NVPTX::SULD_3D_V4I32_ZERO;
4730 Ops.push_back(TexHandle);
4731 Ops.push_back(N->getOperand(2));
4732 Ops.push_back(N->getOperand(3));
4733 Ops.push_back(N->getOperand(4));
4734 Ops.push_back(Chain);
4737 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4742 /// SelectBFE - Look for instruction sequences that can be made more efficient
4743 /// by using the 'bfe' (bit-field extract) PTX instruction
4744 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
4745 SDValue LHS = N->getOperand(0);
4746 SDValue RHS = N->getOperand(1);
4750 bool IsSigned = false;
4752 if (N->getOpcode() == ISD::AND) {
4753 // Canonicalize the operands
4754 // We want 'and %val, %mask'
4755 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4756 std::swap(LHS, RHS);
4759 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
4761 // We need a constant mask on the RHS of the AND
4765 // Extract the mask bits
4766 uint64_t MaskVal = Mask->getZExtValue();
4767 if (!isMask_64(MaskVal)) {
4768 // We *could* handle shifted masks here, but doing so would require an
4769 // 'and' operation to fix up the low-order bits so we would trade
4770 // shr+and for bfe+and, which has the same throughput
4774 // How many bits are in our mask?
4775 uint64_t NumBits = CountTrailingOnes_64(MaskVal);
4776 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
4778 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4779 // We have a 'srl/and' pair, extract the effective start bit and length
4780 Val = LHS.getNode()->getOperand(0);
4781 Start = LHS.getNode()->getOperand(1);
4782 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4784 uint64_t StartVal = StartConst->getZExtValue();
4785 // How many "good" bits do we have left? "good" is defined here as bits
4786 // that exist in the original value, not shifted in.
4787 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
4788 if (NumBits > GoodBits) {
4789 // Do not handle the case where bits have been shifted in. In theory
4790 // we could handle this, but the cost is likely higher than just
4791 // emitting the srl/and pair.
4794 Start = CurDAG->getTargetConstant(StartVal, MVT::i32);
4796 // Do not handle the case where the shift amount (can be zero if no srl
4797 // was found) is not constant. We could handle this case, but it would
4798 // require run-time logic that would be more expensive than just
4799 // emitting the srl/and pair.
4803 // Do not handle the case where the LHS of the and is not a shift. While
4804 // it would be trivial to handle this case, it would just transform
4805 // 'and' -> 'bfe', but 'and' has higher-throughput.
4808 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4809 if (LHS->getOpcode() == ISD::AND) {
4810 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4812 // Shift amount must be constant
4816 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4818 SDValue AndLHS = LHS->getOperand(0);
4819 SDValue AndRHS = LHS->getOperand(1);
4821 // Canonicalize the AND to have the mask on the RHS
4822 if (isa<ConstantSDNode>(AndLHS)) {
4823 std::swap(AndLHS, AndRHS);
4826 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4828 // Mask must be constant
4832 uint64_t MaskVal = MaskCnst->getZExtValue();
4835 if (isMask_64(MaskVal)) {
4837 // The number of bits in the result bitfield will be the number of
4838 // trailing ones (the AND) minus the number of bits we shift off
4839 NumBits = CountTrailingOnes_64(MaskVal) - ShiftAmt;
4840 } else if (isShiftedMask_64(MaskVal)) {
4841 NumZeros = countTrailingZeros(MaskVal);
4842 unsigned NumOnes = CountTrailingOnes_64(MaskVal >> NumZeros);
4843 // The number of bits in the result bitfield will be the number of
4844 // trailing zeros plus the number of set bits in the mask minus the
4845 // number of bits we shift off
4846 NumBits = NumZeros + NumOnes - ShiftAmt;
4848 // This is not a mask we can handle
4852 if (ShiftAmt < NumZeros) {
4853 // Handling this case would require extra logic that would make this
4854 // transformation non-profitable
4859 Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32);
4860 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
4861 } else if (LHS->getOpcode() == ISD::SHL) {
4862 // Here, we have a pattern like:
4864 // (sra (shl val, NN), MM)
4866 // (srl (shl val, NN), MM)
4868 // If MM >= NN, we can efficiently optimize this with bfe
4869 Val = LHS->getOperand(0);
4871 SDValue ShlRHS = LHS->getOperand(1);
4872 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
4874 // Shift amount must be constant
4877 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
4879 SDValue ShrRHS = RHS;
4880 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
4882 // Shift amount must be constant
4885 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
4887 // To avoid extra codegen and be profitable, we need Outer >= Inner
4888 if (OuterShiftAmt < InnerShiftAmt) {
4892 // If the outer shift is more than the type size, we have no bitfield to
4893 // extract (since we also check that the inner shift is <= the outer shift
4894 // then this also implies that the inner shift is < the type size)
4895 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
4900 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32);
4902 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
4903 OuterShiftAmt, MVT::i32);
4905 if (N->getOpcode() == ISD::SRA) {
4906 // If we have a arithmetic right shift, we need to use the signed bfe
4921 // For the BFE operations we form here from "and" and "srl", always use the
4922 // unsigned variants.
4923 if (Val.getValueType() == MVT::i32) {
4925 Opc = NVPTX::BFE_S32rii;
4927 Opc = NVPTX::BFE_U32rii;
4929 } else if (Val.getValueType() == MVT::i64) {
4931 Opc = NVPTX::BFE_S64rii;
4933 Opc = NVPTX::BFE_U64rii;
4936 // We cannot handle this type
4945 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4950 // SelectDirectAddr - Match a direct address for DAG.
4951 // A direct address could be a globaladdress or externalsymbol.
4952 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
4953 // Return true if TGA or ES.
4954 if (N.getOpcode() == ISD::TargetGlobalAddress ||
4955 N.getOpcode() == ISD::TargetExternalSymbol) {
4959 if (N.getOpcode() == NVPTXISD::Wrapper) {
4960 Address = N.getOperand(0);
4963 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
4964 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
4965 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
4966 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
4967 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
4973 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
4974 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
4975 if (Addr.getOpcode() == ISD::ADD) {
4976 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
4977 SDValue base = Addr.getOperand(0);
4978 if (SelectDirectAddr(base, Base)) {
4979 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
4988 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
4989 SDValue &Base, SDValue &Offset) {
4990 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
4994 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
4995 SDValue &Base, SDValue &Offset) {
4996 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
5000 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
5001 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5002 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
5003 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5004 Offset = CurDAG->getTargetConstant(0, mvt);
5007 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
5008 Addr.getOpcode() == ISD::TargetGlobalAddress)
5009 return false; // direct calls.
5011 if (Addr.getOpcode() == ISD::ADD) {
5012 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
5015 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5016 if (FrameIndexSDNode *FIN =
5017 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
5018 // Constant offset from frame ref.
5019 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5021 Base = Addr.getOperand(0);
5022 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
5030 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5031 SDValue &Base, SDValue &Offset) {
5032 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5036 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5037 SDValue &Base, SDValue &Offset) {
5038 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5041 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5042 unsigned int spN) const {
5043 const Value *Src = nullptr;
5044 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
5045 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5047 Src = mN->getMemOperand()->getValue();
5051 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
5052 return (PT->getAddressSpace() == spN);
5056 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5057 /// inline asm expressions.
5058 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
5059 const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
5061 switch (ConstraintCode) {
5065 if (SelectDirectAddr(Op, Op0)) {
5066 OutOps.push_back(Op0);
5067 OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
5070 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5071 OutOps.push_back(Op0);
5072 OutOps.push_back(Op1);