1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines an instruction selector for the NVPTX target.
12 //===----------------------------------------------------------------------===//
14 #include "NVPTXISelDAGToDAG.h"
15 #include "llvm/IR/GlobalValue.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/Target/TargetIntrinsicInfo.h"
25 #define DEBUG_TYPE "nvptx-isel"
27 static cl::opt<int> UsePrecDivF32(
28 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
29 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
30 " IEEE Compliant F32 div.rnd if available."),
34 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
35 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
39 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
40 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
44 /// createNVPTXISelDag - This pass converts a legalized DAG into a
45 /// NVPTX-specific DAG, ready for instruction scheduling.
46 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
47 llvm::CodeGenOpt::Level OptLevel) {
48 return new NVPTXDAGToDAGISel(TM, OptLevel);
51 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
52 CodeGenOpt::Level OptLevel)
53 : SelectionDAGISel(tm, OptLevel) {
54 doMulWide = (OptLevel > 0);
57 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
58 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
59 return SelectionDAGISel::runOnMachineFunction(MF);
62 int NVPTXDAGToDAGISel::getDivF32Level() const {
63 if (UsePrecDivF32.getNumOccurrences() > 0) {
64 // If nvptx-prec-div32=N is used on the command-line, always honor it
67 // Otherwise, use div.approx if fast math is enabled
68 if (TM.Options.UnsafeFPMath)
75 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
76 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
77 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
78 return UsePrecSqrtF32;
80 // Otherwise, use sqrt.approx if fast math is enabled
81 if (TM.Options.UnsafeFPMath)
88 bool NVPTXDAGToDAGISel::useF32FTZ() const {
89 if (FtzEnabled.getNumOccurrences() > 0) {
90 // If nvptx-f32ftz is used on the command-line, always honor it
93 const Function *F = MF->getFunction();
94 // Otherwise, check for an nvptx-f32ftz attribute on the function
95 if (F->hasFnAttribute("nvptx-f32ftz"))
96 return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex,
98 .getValueAsString() == "true");
104 bool NVPTXDAGToDAGISel::allowFMA() const {
105 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
106 return TL->allowFMA(*MF, OptLevel);
109 /// Select - Select instructions not customized! Used for
110 /// expanded, promoted and normal instructions.
111 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
113 if (N->isMachineOpcode()) {
115 return nullptr; // Already selected.
118 SDNode *ResNode = nullptr;
119 switch (N->getOpcode()) {
121 ResNode = SelectLoad(N);
124 ResNode = SelectStore(N);
126 case NVPTXISD::LoadV2:
127 case NVPTXISD::LoadV4:
128 ResNode = SelectLoadVector(N);
130 case NVPTXISD::LDGV2:
131 case NVPTXISD::LDGV4:
132 case NVPTXISD::LDUV2:
133 case NVPTXISD::LDUV4:
134 ResNode = SelectLDGLDU(N);
136 case NVPTXISD::StoreV2:
137 case NVPTXISD::StoreV4:
138 ResNode = SelectStoreVector(N);
140 case NVPTXISD::LoadParam:
141 case NVPTXISD::LoadParamV2:
142 case NVPTXISD::LoadParamV4:
143 ResNode = SelectLoadParam(N);
145 case NVPTXISD::StoreRetval:
146 case NVPTXISD::StoreRetvalV2:
147 case NVPTXISD::StoreRetvalV4:
148 ResNode = SelectStoreRetval(N);
150 case NVPTXISD::StoreParam:
151 case NVPTXISD::StoreParamV2:
152 case NVPTXISD::StoreParamV4:
153 case NVPTXISD::StoreParamS32:
154 case NVPTXISD::StoreParamU32:
155 ResNode = SelectStoreParam(N);
157 case ISD::INTRINSIC_WO_CHAIN:
158 ResNode = SelectIntrinsicNoChain(N);
160 case ISD::INTRINSIC_W_CHAIN:
161 ResNode = SelectIntrinsicChain(N);
163 case NVPTXISD::Tex1DFloatS32:
164 case NVPTXISD::Tex1DFloatFloat:
165 case NVPTXISD::Tex1DFloatFloatLevel:
166 case NVPTXISD::Tex1DFloatFloatGrad:
167 case NVPTXISD::Tex1DS32S32:
168 case NVPTXISD::Tex1DS32Float:
169 case NVPTXISD::Tex1DS32FloatLevel:
170 case NVPTXISD::Tex1DS32FloatGrad:
171 case NVPTXISD::Tex1DU32S32:
172 case NVPTXISD::Tex1DU32Float:
173 case NVPTXISD::Tex1DU32FloatLevel:
174 case NVPTXISD::Tex1DU32FloatGrad:
175 case NVPTXISD::Tex1DArrayFloatS32:
176 case NVPTXISD::Tex1DArrayFloatFloat:
177 case NVPTXISD::Tex1DArrayFloatFloatLevel:
178 case NVPTXISD::Tex1DArrayFloatFloatGrad:
179 case NVPTXISD::Tex1DArrayS32S32:
180 case NVPTXISD::Tex1DArrayS32Float:
181 case NVPTXISD::Tex1DArrayS32FloatLevel:
182 case NVPTXISD::Tex1DArrayS32FloatGrad:
183 case NVPTXISD::Tex1DArrayU32S32:
184 case NVPTXISD::Tex1DArrayU32Float:
185 case NVPTXISD::Tex1DArrayU32FloatLevel:
186 case NVPTXISD::Tex1DArrayU32FloatGrad:
187 case NVPTXISD::Tex2DFloatS32:
188 case NVPTXISD::Tex2DFloatFloat:
189 case NVPTXISD::Tex2DFloatFloatLevel:
190 case NVPTXISD::Tex2DFloatFloatGrad:
191 case NVPTXISD::Tex2DS32S32:
192 case NVPTXISD::Tex2DS32Float:
193 case NVPTXISD::Tex2DS32FloatLevel:
194 case NVPTXISD::Tex2DS32FloatGrad:
195 case NVPTXISD::Tex2DU32S32:
196 case NVPTXISD::Tex2DU32Float:
197 case NVPTXISD::Tex2DU32FloatLevel:
198 case NVPTXISD::Tex2DU32FloatGrad:
199 case NVPTXISD::Tex2DArrayFloatS32:
200 case NVPTXISD::Tex2DArrayFloatFloat:
201 case NVPTXISD::Tex2DArrayFloatFloatLevel:
202 case NVPTXISD::Tex2DArrayFloatFloatGrad:
203 case NVPTXISD::Tex2DArrayS32S32:
204 case NVPTXISD::Tex2DArrayS32Float:
205 case NVPTXISD::Tex2DArrayS32FloatLevel:
206 case NVPTXISD::Tex2DArrayS32FloatGrad:
207 case NVPTXISD::Tex2DArrayU32S32:
208 case NVPTXISD::Tex2DArrayU32Float:
209 case NVPTXISD::Tex2DArrayU32FloatLevel:
210 case NVPTXISD::Tex2DArrayU32FloatGrad:
211 case NVPTXISD::Tex3DFloatS32:
212 case NVPTXISD::Tex3DFloatFloat:
213 case NVPTXISD::Tex3DFloatFloatLevel:
214 case NVPTXISD::Tex3DFloatFloatGrad:
215 case NVPTXISD::Tex3DS32S32:
216 case NVPTXISD::Tex3DS32Float:
217 case NVPTXISD::Tex3DS32FloatLevel:
218 case NVPTXISD::Tex3DS32FloatGrad:
219 case NVPTXISD::Tex3DU32S32:
220 case NVPTXISD::Tex3DU32Float:
221 case NVPTXISD::Tex3DU32FloatLevel:
222 case NVPTXISD::Tex3DU32FloatGrad:
223 case NVPTXISD::TexCubeFloatFloat:
224 case NVPTXISD::TexCubeFloatFloatLevel:
225 case NVPTXISD::TexCubeS32Float:
226 case NVPTXISD::TexCubeS32FloatLevel:
227 case NVPTXISD::TexCubeU32Float:
228 case NVPTXISD::TexCubeU32FloatLevel:
229 case NVPTXISD::TexCubeArrayFloatFloat:
230 case NVPTXISD::TexCubeArrayFloatFloatLevel:
231 case NVPTXISD::TexCubeArrayS32Float:
232 case NVPTXISD::TexCubeArrayS32FloatLevel:
233 case NVPTXISD::TexCubeArrayU32Float:
234 case NVPTXISD::TexCubeArrayU32FloatLevel:
235 case NVPTXISD::Tld4R2DFloatFloat:
236 case NVPTXISD::Tld4G2DFloatFloat:
237 case NVPTXISD::Tld4B2DFloatFloat:
238 case NVPTXISD::Tld4A2DFloatFloat:
239 case NVPTXISD::Tld4R2DS64Float:
240 case NVPTXISD::Tld4G2DS64Float:
241 case NVPTXISD::Tld4B2DS64Float:
242 case NVPTXISD::Tld4A2DS64Float:
243 case NVPTXISD::Tld4R2DU64Float:
244 case NVPTXISD::Tld4G2DU64Float:
245 case NVPTXISD::Tld4B2DU64Float:
246 case NVPTXISD::Tld4A2DU64Float:
247 case NVPTXISD::TexUnified1DFloatS32:
248 case NVPTXISD::TexUnified1DFloatFloat:
249 case NVPTXISD::TexUnified1DFloatFloatLevel:
250 case NVPTXISD::TexUnified1DFloatFloatGrad:
251 case NVPTXISD::TexUnified1DS32S32:
252 case NVPTXISD::TexUnified1DS32Float:
253 case NVPTXISD::TexUnified1DS32FloatLevel:
254 case NVPTXISD::TexUnified1DS32FloatGrad:
255 case NVPTXISD::TexUnified1DU32S32:
256 case NVPTXISD::TexUnified1DU32Float:
257 case NVPTXISD::TexUnified1DU32FloatLevel:
258 case NVPTXISD::TexUnified1DU32FloatGrad:
259 case NVPTXISD::TexUnified1DArrayFloatS32:
260 case NVPTXISD::TexUnified1DArrayFloatFloat:
261 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
262 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
263 case NVPTXISD::TexUnified1DArrayS32S32:
264 case NVPTXISD::TexUnified1DArrayS32Float:
265 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
266 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
267 case NVPTXISD::TexUnified1DArrayU32S32:
268 case NVPTXISD::TexUnified1DArrayU32Float:
269 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
270 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
271 case NVPTXISD::TexUnified2DFloatS32:
272 case NVPTXISD::TexUnified2DFloatFloat:
273 case NVPTXISD::TexUnified2DFloatFloatLevel:
274 case NVPTXISD::TexUnified2DFloatFloatGrad:
275 case NVPTXISD::TexUnified2DS32S32:
276 case NVPTXISD::TexUnified2DS32Float:
277 case NVPTXISD::TexUnified2DS32FloatLevel:
278 case NVPTXISD::TexUnified2DS32FloatGrad:
279 case NVPTXISD::TexUnified2DU32S32:
280 case NVPTXISD::TexUnified2DU32Float:
281 case NVPTXISD::TexUnified2DU32FloatLevel:
282 case NVPTXISD::TexUnified2DU32FloatGrad:
283 case NVPTXISD::TexUnified2DArrayFloatS32:
284 case NVPTXISD::TexUnified2DArrayFloatFloat:
285 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
286 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
287 case NVPTXISD::TexUnified2DArrayS32S32:
288 case NVPTXISD::TexUnified2DArrayS32Float:
289 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
290 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
291 case NVPTXISD::TexUnified2DArrayU32S32:
292 case NVPTXISD::TexUnified2DArrayU32Float:
293 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
294 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
295 case NVPTXISD::TexUnified3DFloatS32:
296 case NVPTXISD::TexUnified3DFloatFloat:
297 case NVPTXISD::TexUnified3DFloatFloatLevel:
298 case NVPTXISD::TexUnified3DFloatFloatGrad:
299 case NVPTXISD::TexUnified3DS32S32:
300 case NVPTXISD::TexUnified3DS32Float:
301 case NVPTXISD::TexUnified3DS32FloatLevel:
302 case NVPTXISD::TexUnified3DS32FloatGrad:
303 case NVPTXISD::TexUnified3DU32S32:
304 case NVPTXISD::TexUnified3DU32Float:
305 case NVPTXISD::TexUnified3DU32FloatLevel:
306 case NVPTXISD::TexUnified3DU32FloatGrad:
307 case NVPTXISD::TexUnifiedCubeFloatFloat:
308 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
309 case NVPTXISD::TexUnifiedCubeS32Float:
310 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
311 case NVPTXISD::TexUnifiedCubeU32Float:
312 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
313 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
314 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
315 case NVPTXISD::TexUnifiedCubeArrayS32Float:
316 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
317 case NVPTXISD::TexUnifiedCubeArrayU32Float:
318 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
319 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
320 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
321 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
322 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
323 case NVPTXISD::Tld4UnifiedR2DS64Float:
324 case NVPTXISD::Tld4UnifiedG2DS64Float:
325 case NVPTXISD::Tld4UnifiedB2DS64Float:
326 case NVPTXISD::Tld4UnifiedA2DS64Float:
327 case NVPTXISD::Tld4UnifiedR2DU64Float:
328 case NVPTXISD::Tld4UnifiedG2DU64Float:
329 case NVPTXISD::Tld4UnifiedB2DU64Float:
330 case NVPTXISD::Tld4UnifiedA2DU64Float:
331 ResNode = SelectTextureIntrinsic(N);
333 case NVPTXISD::Suld1DI8Clamp:
334 case NVPTXISD::Suld1DI16Clamp:
335 case NVPTXISD::Suld1DI32Clamp:
336 case NVPTXISD::Suld1DI64Clamp:
337 case NVPTXISD::Suld1DV2I8Clamp:
338 case NVPTXISD::Suld1DV2I16Clamp:
339 case NVPTXISD::Suld1DV2I32Clamp:
340 case NVPTXISD::Suld1DV2I64Clamp:
341 case NVPTXISD::Suld1DV4I8Clamp:
342 case NVPTXISD::Suld1DV4I16Clamp:
343 case NVPTXISD::Suld1DV4I32Clamp:
344 case NVPTXISD::Suld1DArrayI8Clamp:
345 case NVPTXISD::Suld1DArrayI16Clamp:
346 case NVPTXISD::Suld1DArrayI32Clamp:
347 case NVPTXISD::Suld1DArrayI64Clamp:
348 case NVPTXISD::Suld1DArrayV2I8Clamp:
349 case NVPTXISD::Suld1DArrayV2I16Clamp:
350 case NVPTXISD::Suld1DArrayV2I32Clamp:
351 case NVPTXISD::Suld1DArrayV2I64Clamp:
352 case NVPTXISD::Suld1DArrayV4I8Clamp:
353 case NVPTXISD::Suld1DArrayV4I16Clamp:
354 case NVPTXISD::Suld1DArrayV4I32Clamp:
355 case NVPTXISD::Suld2DI8Clamp:
356 case NVPTXISD::Suld2DI16Clamp:
357 case NVPTXISD::Suld2DI32Clamp:
358 case NVPTXISD::Suld2DI64Clamp:
359 case NVPTXISD::Suld2DV2I8Clamp:
360 case NVPTXISD::Suld2DV2I16Clamp:
361 case NVPTXISD::Suld2DV2I32Clamp:
362 case NVPTXISD::Suld2DV2I64Clamp:
363 case NVPTXISD::Suld2DV4I8Clamp:
364 case NVPTXISD::Suld2DV4I16Clamp:
365 case NVPTXISD::Suld2DV4I32Clamp:
366 case NVPTXISD::Suld2DArrayI8Clamp:
367 case NVPTXISD::Suld2DArrayI16Clamp:
368 case NVPTXISD::Suld2DArrayI32Clamp:
369 case NVPTXISD::Suld2DArrayI64Clamp:
370 case NVPTXISD::Suld2DArrayV2I8Clamp:
371 case NVPTXISD::Suld2DArrayV2I16Clamp:
372 case NVPTXISD::Suld2DArrayV2I32Clamp:
373 case NVPTXISD::Suld2DArrayV2I64Clamp:
374 case NVPTXISD::Suld2DArrayV4I8Clamp:
375 case NVPTXISD::Suld2DArrayV4I16Clamp:
376 case NVPTXISD::Suld2DArrayV4I32Clamp:
377 case NVPTXISD::Suld3DI8Clamp:
378 case NVPTXISD::Suld3DI16Clamp:
379 case NVPTXISD::Suld3DI32Clamp:
380 case NVPTXISD::Suld3DI64Clamp:
381 case NVPTXISD::Suld3DV2I8Clamp:
382 case NVPTXISD::Suld3DV2I16Clamp:
383 case NVPTXISD::Suld3DV2I32Clamp:
384 case NVPTXISD::Suld3DV2I64Clamp:
385 case NVPTXISD::Suld3DV4I8Clamp:
386 case NVPTXISD::Suld3DV4I16Clamp:
387 case NVPTXISD::Suld3DV4I32Clamp:
388 case NVPTXISD::Suld1DI8Trap:
389 case NVPTXISD::Suld1DI16Trap:
390 case NVPTXISD::Suld1DI32Trap:
391 case NVPTXISD::Suld1DI64Trap:
392 case NVPTXISD::Suld1DV2I8Trap:
393 case NVPTXISD::Suld1DV2I16Trap:
394 case NVPTXISD::Suld1DV2I32Trap:
395 case NVPTXISD::Suld1DV2I64Trap:
396 case NVPTXISD::Suld1DV4I8Trap:
397 case NVPTXISD::Suld1DV4I16Trap:
398 case NVPTXISD::Suld1DV4I32Trap:
399 case NVPTXISD::Suld1DArrayI8Trap:
400 case NVPTXISD::Suld1DArrayI16Trap:
401 case NVPTXISD::Suld1DArrayI32Trap:
402 case NVPTXISD::Suld1DArrayI64Trap:
403 case NVPTXISD::Suld1DArrayV2I8Trap:
404 case NVPTXISD::Suld1DArrayV2I16Trap:
405 case NVPTXISD::Suld1DArrayV2I32Trap:
406 case NVPTXISD::Suld1DArrayV2I64Trap:
407 case NVPTXISD::Suld1DArrayV4I8Trap:
408 case NVPTXISD::Suld1DArrayV4I16Trap:
409 case NVPTXISD::Suld1DArrayV4I32Trap:
410 case NVPTXISD::Suld2DI8Trap:
411 case NVPTXISD::Suld2DI16Trap:
412 case NVPTXISD::Suld2DI32Trap:
413 case NVPTXISD::Suld2DI64Trap:
414 case NVPTXISD::Suld2DV2I8Trap:
415 case NVPTXISD::Suld2DV2I16Trap:
416 case NVPTXISD::Suld2DV2I32Trap:
417 case NVPTXISD::Suld2DV2I64Trap:
418 case NVPTXISD::Suld2DV4I8Trap:
419 case NVPTXISD::Suld2DV4I16Trap:
420 case NVPTXISD::Suld2DV4I32Trap:
421 case NVPTXISD::Suld2DArrayI8Trap:
422 case NVPTXISD::Suld2DArrayI16Trap:
423 case NVPTXISD::Suld2DArrayI32Trap:
424 case NVPTXISD::Suld2DArrayI64Trap:
425 case NVPTXISD::Suld2DArrayV2I8Trap:
426 case NVPTXISD::Suld2DArrayV2I16Trap:
427 case NVPTXISD::Suld2DArrayV2I32Trap:
428 case NVPTXISD::Suld2DArrayV2I64Trap:
429 case NVPTXISD::Suld2DArrayV4I8Trap:
430 case NVPTXISD::Suld2DArrayV4I16Trap:
431 case NVPTXISD::Suld2DArrayV4I32Trap:
432 case NVPTXISD::Suld3DI8Trap:
433 case NVPTXISD::Suld3DI16Trap:
434 case NVPTXISD::Suld3DI32Trap:
435 case NVPTXISD::Suld3DI64Trap:
436 case NVPTXISD::Suld3DV2I8Trap:
437 case NVPTXISD::Suld3DV2I16Trap:
438 case NVPTXISD::Suld3DV2I32Trap:
439 case NVPTXISD::Suld3DV2I64Trap:
440 case NVPTXISD::Suld3DV4I8Trap:
441 case NVPTXISD::Suld3DV4I16Trap:
442 case NVPTXISD::Suld3DV4I32Trap:
443 case NVPTXISD::Suld1DI8Zero:
444 case NVPTXISD::Suld1DI16Zero:
445 case NVPTXISD::Suld1DI32Zero:
446 case NVPTXISD::Suld1DI64Zero:
447 case NVPTXISD::Suld1DV2I8Zero:
448 case NVPTXISD::Suld1DV2I16Zero:
449 case NVPTXISD::Suld1DV2I32Zero:
450 case NVPTXISD::Suld1DV2I64Zero:
451 case NVPTXISD::Suld1DV4I8Zero:
452 case NVPTXISD::Suld1DV4I16Zero:
453 case NVPTXISD::Suld1DV4I32Zero:
454 case NVPTXISD::Suld1DArrayI8Zero:
455 case NVPTXISD::Suld1DArrayI16Zero:
456 case NVPTXISD::Suld1DArrayI32Zero:
457 case NVPTXISD::Suld1DArrayI64Zero:
458 case NVPTXISD::Suld1DArrayV2I8Zero:
459 case NVPTXISD::Suld1DArrayV2I16Zero:
460 case NVPTXISD::Suld1DArrayV2I32Zero:
461 case NVPTXISD::Suld1DArrayV2I64Zero:
462 case NVPTXISD::Suld1DArrayV4I8Zero:
463 case NVPTXISD::Suld1DArrayV4I16Zero:
464 case NVPTXISD::Suld1DArrayV4I32Zero:
465 case NVPTXISD::Suld2DI8Zero:
466 case NVPTXISD::Suld2DI16Zero:
467 case NVPTXISD::Suld2DI32Zero:
468 case NVPTXISD::Suld2DI64Zero:
469 case NVPTXISD::Suld2DV2I8Zero:
470 case NVPTXISD::Suld2DV2I16Zero:
471 case NVPTXISD::Suld2DV2I32Zero:
472 case NVPTXISD::Suld2DV2I64Zero:
473 case NVPTXISD::Suld2DV4I8Zero:
474 case NVPTXISD::Suld2DV4I16Zero:
475 case NVPTXISD::Suld2DV4I32Zero:
476 case NVPTXISD::Suld2DArrayI8Zero:
477 case NVPTXISD::Suld2DArrayI16Zero:
478 case NVPTXISD::Suld2DArrayI32Zero:
479 case NVPTXISD::Suld2DArrayI64Zero:
480 case NVPTXISD::Suld2DArrayV2I8Zero:
481 case NVPTXISD::Suld2DArrayV2I16Zero:
482 case NVPTXISD::Suld2DArrayV2I32Zero:
483 case NVPTXISD::Suld2DArrayV2I64Zero:
484 case NVPTXISD::Suld2DArrayV4I8Zero:
485 case NVPTXISD::Suld2DArrayV4I16Zero:
486 case NVPTXISD::Suld2DArrayV4I32Zero:
487 case NVPTXISD::Suld3DI8Zero:
488 case NVPTXISD::Suld3DI16Zero:
489 case NVPTXISD::Suld3DI32Zero:
490 case NVPTXISD::Suld3DI64Zero:
491 case NVPTXISD::Suld3DV2I8Zero:
492 case NVPTXISD::Suld3DV2I16Zero:
493 case NVPTXISD::Suld3DV2I32Zero:
494 case NVPTXISD::Suld3DV2I64Zero:
495 case NVPTXISD::Suld3DV4I8Zero:
496 case NVPTXISD::Suld3DV4I16Zero:
497 case NVPTXISD::Suld3DV4I32Zero:
498 ResNode = SelectSurfaceIntrinsic(N);
504 ResNode = SelectBFE(N);
506 case ISD::ADDRSPACECAST:
507 ResNode = SelectAddrSpaceCast(N);
514 return SelectCode(N);
517 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
518 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
522 case Intrinsic::nvvm_ldg_global_f:
523 case Intrinsic::nvvm_ldg_global_i:
524 case Intrinsic::nvvm_ldg_global_p:
525 case Intrinsic::nvvm_ldu_global_f:
526 case Intrinsic::nvvm_ldu_global_i:
527 case Intrinsic::nvvm_ldu_global_p:
528 return SelectLDGLDU(N);
532 static unsigned int getCodeAddrSpace(MemSDNode *N,
533 const NVPTXSubtarget *Subtarget) {
534 const Value *Src = N->getMemOperand()->getValue();
537 return NVPTX::PTXLdStInstCode::GENERIC;
539 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
540 switch (PT->getAddressSpace()) {
541 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
542 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
543 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
544 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
545 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
546 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
550 return NVPTX::PTXLdStInstCode::GENERIC;
553 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
554 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
558 case Intrinsic::nvvm_texsurf_handle_internal:
559 return SelectTexSurfHandle(N);
563 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
564 // Op 0 is the intrinsic ID
565 SDValue Wrapper = N->getOperand(1);
566 SDValue GlobalVal = Wrapper.getOperand(0);
567 return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
571 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
572 SDValue Src = N->getOperand(0);
573 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
574 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
575 unsigned DstAddrSpace = CastN->getDestAddressSpace();
577 assert(SrcAddrSpace != DstAddrSpace &&
578 "addrspacecast must be between different address spaces");
580 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
581 // Specific to generic
583 switch (SrcAddrSpace) {
584 default: report_fatal_error("Bad address space in addrspacecast");
585 case ADDRESS_SPACE_GLOBAL:
586 Opc = Subtarget->is64Bit() ? NVPTX::cvta_global_yes_64
587 : NVPTX::cvta_global_yes;
589 case ADDRESS_SPACE_SHARED:
590 Opc = Subtarget->is64Bit() ? NVPTX::cvta_shared_yes_64
591 : NVPTX::cvta_shared_yes;
593 case ADDRESS_SPACE_CONST:
594 Opc = Subtarget->is64Bit() ? NVPTX::cvta_const_yes_64
595 : NVPTX::cvta_const_yes;
597 case ADDRESS_SPACE_LOCAL:
598 Opc = Subtarget->is64Bit() ? NVPTX::cvta_local_yes_64
599 : NVPTX::cvta_local_yes;
602 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
604 // Generic to specific
605 if (SrcAddrSpace != 0)
606 report_fatal_error("Cannot cast between two non-generic address spaces");
608 switch (DstAddrSpace) {
609 default: report_fatal_error("Bad address space in addrspacecast");
610 case ADDRESS_SPACE_GLOBAL:
611 Opc = Subtarget->is64Bit() ? NVPTX::cvta_to_global_yes_64
612 : NVPTX::cvta_to_global_yes;
614 case ADDRESS_SPACE_SHARED:
615 Opc = Subtarget->is64Bit() ? NVPTX::cvta_to_shared_yes_64
616 : NVPTX::cvta_to_shared_yes;
618 case ADDRESS_SPACE_CONST:
619 Opc = Subtarget->is64Bit() ? NVPTX::cvta_to_const_yes_64
620 : NVPTX::cvta_to_const_yes;
622 case ADDRESS_SPACE_LOCAL:
623 Opc = Subtarget->is64Bit() ? NVPTX::cvta_to_local_yes_64
624 : NVPTX::cvta_to_local_yes;
627 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
631 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
633 LoadSDNode *LD = cast<LoadSDNode>(N);
634 EVT LoadedVT = LD->getMemoryVT();
635 SDNode *NVPTXLD = nullptr;
637 // do not support pre/post inc/dec
641 if (!LoadedVT.isSimple())
644 // Address Space Setting
645 unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
648 // - .volatile is only availalble for .global and .shared
649 bool isVolatile = LD->isVolatile();
650 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
651 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
652 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
656 MVT SimpleVT = LoadedVT.getSimpleVT();
657 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
658 if (SimpleVT.isVector()) {
659 unsigned num = SimpleVT.getVectorNumElements();
661 vecType = NVPTX::PTXLdStInstCode::V2;
663 vecType = NVPTX::PTXLdStInstCode::V4;
668 // Type Setting: fromType + fromTypeWidth
670 // Sign : ISD::SEXTLOAD
671 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
673 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
674 MVT ScalarVT = SimpleVT.getScalarType();
675 // Read at least 8 bits (predicates are stored as 8-bit values)
676 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
677 unsigned int fromType;
678 if ((LD->getExtensionType() == ISD::SEXTLOAD))
679 fromType = NVPTX::PTXLdStInstCode::Signed;
680 else if (ScalarVT.isFloatingPoint())
681 fromType = NVPTX::PTXLdStInstCode::Float;
683 fromType = NVPTX::PTXLdStInstCode::Unsigned;
685 // Create the machine instruction DAG
686 SDValue Chain = N->getOperand(0);
687 SDValue N1 = N->getOperand(1);
689 SDValue Offset, Base;
691 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
693 if (SelectDirectAddr(N1, Addr)) {
696 Opcode = NVPTX::LD_i8_avar;
699 Opcode = NVPTX::LD_i16_avar;
702 Opcode = NVPTX::LD_i32_avar;
705 Opcode = NVPTX::LD_i64_avar;
708 Opcode = NVPTX::LD_f32_avar;
711 Opcode = NVPTX::LD_f64_avar;
716 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
717 getI32Imm(vecType), getI32Imm(fromType),
718 getI32Imm(fromTypeWidth), Addr, Chain };
719 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
720 } else if (Subtarget->is64Bit()
721 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
722 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
725 Opcode = NVPTX::LD_i8_asi;
728 Opcode = NVPTX::LD_i16_asi;
731 Opcode = NVPTX::LD_i32_asi;
734 Opcode = NVPTX::LD_i64_asi;
737 Opcode = NVPTX::LD_f32_asi;
740 Opcode = NVPTX::LD_f64_asi;
745 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
746 getI32Imm(vecType), getI32Imm(fromType),
747 getI32Imm(fromTypeWidth), Base, Offset, Chain };
748 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
749 } else if (Subtarget->is64Bit()
750 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
751 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
752 if (Subtarget->is64Bit()) {
755 Opcode = NVPTX::LD_i8_ari_64;
758 Opcode = NVPTX::LD_i16_ari_64;
761 Opcode = NVPTX::LD_i32_ari_64;
764 Opcode = NVPTX::LD_i64_ari_64;
767 Opcode = NVPTX::LD_f32_ari_64;
770 Opcode = NVPTX::LD_f64_ari_64;
778 Opcode = NVPTX::LD_i8_ari;
781 Opcode = NVPTX::LD_i16_ari;
784 Opcode = NVPTX::LD_i32_ari;
787 Opcode = NVPTX::LD_i64_ari;
790 Opcode = NVPTX::LD_f32_ari;
793 Opcode = NVPTX::LD_f64_ari;
799 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
800 getI32Imm(vecType), getI32Imm(fromType),
801 getI32Imm(fromTypeWidth), Base, Offset, Chain };
802 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
804 if (Subtarget->is64Bit()) {
807 Opcode = NVPTX::LD_i8_areg_64;
810 Opcode = NVPTX::LD_i16_areg_64;
813 Opcode = NVPTX::LD_i32_areg_64;
816 Opcode = NVPTX::LD_i64_areg_64;
819 Opcode = NVPTX::LD_f32_areg_64;
822 Opcode = NVPTX::LD_f64_areg_64;
830 Opcode = NVPTX::LD_i8_areg;
833 Opcode = NVPTX::LD_i16_areg;
836 Opcode = NVPTX::LD_i32_areg;
839 Opcode = NVPTX::LD_i64_areg;
842 Opcode = NVPTX::LD_f32_areg;
845 Opcode = NVPTX::LD_f64_areg;
851 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
852 getI32Imm(vecType), getI32Imm(fromType),
853 getI32Imm(fromTypeWidth), N1, Chain };
854 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
858 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
859 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
860 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
866 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
868 SDValue Chain = N->getOperand(0);
869 SDValue Op1 = N->getOperand(1);
870 SDValue Addr, Offset, Base;
874 MemSDNode *MemSD = cast<MemSDNode>(N);
875 EVT LoadedVT = MemSD->getMemoryVT();
877 if (!LoadedVT.isSimple())
880 // Address Space Setting
881 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
884 // - .volatile is only availalble for .global and .shared
885 bool IsVolatile = MemSD->isVolatile();
886 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
887 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
888 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
892 MVT SimpleVT = LoadedVT.getSimpleVT();
894 // Type Setting: fromType + fromTypeWidth
896 // Sign : ISD::SEXTLOAD
897 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
899 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
900 MVT ScalarVT = SimpleVT.getScalarType();
901 // Read at least 8 bits (predicates are stored as 8-bit values)
902 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
903 unsigned int FromType;
904 // The last operand holds the original LoadSDNode::getExtensionType() value
905 unsigned ExtensionType = cast<ConstantSDNode>(
906 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
907 if (ExtensionType == ISD::SEXTLOAD)
908 FromType = NVPTX::PTXLdStInstCode::Signed;
909 else if (ScalarVT.isFloatingPoint())
910 FromType = NVPTX::PTXLdStInstCode::Float;
912 FromType = NVPTX::PTXLdStInstCode::Unsigned;
916 switch (N->getOpcode()) {
917 case NVPTXISD::LoadV2:
918 VecType = NVPTX::PTXLdStInstCode::V2;
920 case NVPTXISD::LoadV4:
921 VecType = NVPTX::PTXLdStInstCode::V4;
927 EVT EltVT = N->getValueType(0);
929 if (SelectDirectAddr(Op1, Addr)) {
930 switch (N->getOpcode()) {
933 case NVPTXISD::LoadV2:
934 switch (EltVT.getSimpleVT().SimpleTy) {
938 Opcode = NVPTX::LDV_i8_v2_avar;
941 Opcode = NVPTX::LDV_i16_v2_avar;
944 Opcode = NVPTX::LDV_i32_v2_avar;
947 Opcode = NVPTX::LDV_i64_v2_avar;
950 Opcode = NVPTX::LDV_f32_v2_avar;
953 Opcode = NVPTX::LDV_f64_v2_avar;
957 case NVPTXISD::LoadV4:
958 switch (EltVT.getSimpleVT().SimpleTy) {
962 Opcode = NVPTX::LDV_i8_v4_avar;
965 Opcode = NVPTX::LDV_i16_v4_avar;
968 Opcode = NVPTX::LDV_i32_v4_avar;
971 Opcode = NVPTX::LDV_f32_v4_avar;
977 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
978 getI32Imm(VecType), getI32Imm(FromType),
979 getI32Imm(FromTypeWidth), Addr, Chain };
980 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
981 } else if (Subtarget->is64Bit()
982 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
983 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
984 switch (N->getOpcode()) {
987 case NVPTXISD::LoadV2:
988 switch (EltVT.getSimpleVT().SimpleTy) {
992 Opcode = NVPTX::LDV_i8_v2_asi;
995 Opcode = NVPTX::LDV_i16_v2_asi;
998 Opcode = NVPTX::LDV_i32_v2_asi;
1001 Opcode = NVPTX::LDV_i64_v2_asi;
1004 Opcode = NVPTX::LDV_f32_v2_asi;
1007 Opcode = NVPTX::LDV_f64_v2_asi;
1011 case NVPTXISD::LoadV4:
1012 switch (EltVT.getSimpleVT().SimpleTy) {
1016 Opcode = NVPTX::LDV_i8_v4_asi;
1019 Opcode = NVPTX::LDV_i16_v4_asi;
1022 Opcode = NVPTX::LDV_i32_v4_asi;
1025 Opcode = NVPTX::LDV_f32_v4_asi;
1031 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1032 getI32Imm(VecType), getI32Imm(FromType),
1033 getI32Imm(FromTypeWidth), Base, Offset, Chain };
1034 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1035 } else if (Subtarget->is64Bit()
1036 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1037 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1038 if (Subtarget->is64Bit()) {
1039 switch (N->getOpcode()) {
1042 case NVPTXISD::LoadV2:
1043 switch (EltVT.getSimpleVT().SimpleTy) {
1047 Opcode = NVPTX::LDV_i8_v2_ari_64;
1050 Opcode = NVPTX::LDV_i16_v2_ari_64;
1053 Opcode = NVPTX::LDV_i32_v2_ari_64;
1056 Opcode = NVPTX::LDV_i64_v2_ari_64;
1059 Opcode = NVPTX::LDV_f32_v2_ari_64;
1062 Opcode = NVPTX::LDV_f64_v2_ari_64;
1066 case NVPTXISD::LoadV4:
1067 switch (EltVT.getSimpleVT().SimpleTy) {
1071 Opcode = NVPTX::LDV_i8_v4_ari_64;
1074 Opcode = NVPTX::LDV_i16_v4_ari_64;
1077 Opcode = NVPTX::LDV_i32_v4_ari_64;
1080 Opcode = NVPTX::LDV_f32_v4_ari_64;
1086 switch (N->getOpcode()) {
1089 case NVPTXISD::LoadV2:
1090 switch (EltVT.getSimpleVT().SimpleTy) {
1094 Opcode = NVPTX::LDV_i8_v2_ari;
1097 Opcode = NVPTX::LDV_i16_v2_ari;
1100 Opcode = NVPTX::LDV_i32_v2_ari;
1103 Opcode = NVPTX::LDV_i64_v2_ari;
1106 Opcode = NVPTX::LDV_f32_v2_ari;
1109 Opcode = NVPTX::LDV_f64_v2_ari;
1113 case NVPTXISD::LoadV4:
1114 switch (EltVT.getSimpleVT().SimpleTy) {
1118 Opcode = NVPTX::LDV_i8_v4_ari;
1121 Opcode = NVPTX::LDV_i16_v4_ari;
1124 Opcode = NVPTX::LDV_i32_v4_ari;
1127 Opcode = NVPTX::LDV_f32_v4_ari;
1134 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1135 getI32Imm(VecType), getI32Imm(FromType),
1136 getI32Imm(FromTypeWidth), Base, Offset, Chain };
1138 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1140 if (Subtarget->is64Bit()) {
1141 switch (N->getOpcode()) {
1144 case NVPTXISD::LoadV2:
1145 switch (EltVT.getSimpleVT().SimpleTy) {
1149 Opcode = NVPTX::LDV_i8_v2_areg_64;
1152 Opcode = NVPTX::LDV_i16_v2_areg_64;
1155 Opcode = NVPTX::LDV_i32_v2_areg_64;
1158 Opcode = NVPTX::LDV_i64_v2_areg_64;
1161 Opcode = NVPTX::LDV_f32_v2_areg_64;
1164 Opcode = NVPTX::LDV_f64_v2_areg_64;
1168 case NVPTXISD::LoadV4:
1169 switch (EltVT.getSimpleVT().SimpleTy) {
1173 Opcode = NVPTX::LDV_i8_v4_areg_64;
1176 Opcode = NVPTX::LDV_i16_v4_areg_64;
1179 Opcode = NVPTX::LDV_i32_v4_areg_64;
1182 Opcode = NVPTX::LDV_f32_v4_areg_64;
1188 switch (N->getOpcode()) {
1191 case NVPTXISD::LoadV2:
1192 switch (EltVT.getSimpleVT().SimpleTy) {
1196 Opcode = NVPTX::LDV_i8_v2_areg;
1199 Opcode = NVPTX::LDV_i16_v2_areg;
1202 Opcode = NVPTX::LDV_i32_v2_areg;
1205 Opcode = NVPTX::LDV_i64_v2_areg;
1208 Opcode = NVPTX::LDV_f32_v2_areg;
1211 Opcode = NVPTX::LDV_f64_v2_areg;
1215 case NVPTXISD::LoadV4:
1216 switch (EltVT.getSimpleVT().SimpleTy) {
1220 Opcode = NVPTX::LDV_i8_v4_areg;
1223 Opcode = NVPTX::LDV_i16_v4_areg;
1226 Opcode = NVPTX::LDV_i32_v4_areg;
1229 Opcode = NVPTX::LDV_f32_v4_areg;
1236 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
1237 getI32Imm(VecType), getI32Imm(FromType),
1238 getI32Imm(FromTypeWidth), Op1, Chain };
1239 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1242 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1243 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1244 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1249 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
1251 SDValue Chain = N->getOperand(0);
1256 // If this is an LDG intrinsic, the address is the third operand. Its its an
1257 // LDG/LDU SD node (from custom vector handling), then its the second operand
1258 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1259 Op1 = N->getOperand(2);
1260 Mem = cast<MemIntrinsicSDNode>(N);
1261 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1265 case Intrinsic::nvvm_ldg_global_f:
1266 case Intrinsic::nvvm_ldg_global_i:
1267 case Intrinsic::nvvm_ldg_global_p:
1270 case Intrinsic::nvvm_ldu_global_f:
1271 case Intrinsic::nvvm_ldu_global_i:
1272 case Intrinsic::nvvm_ldu_global_p:
1277 Op1 = N->getOperand(1);
1278 Mem = cast<MemSDNode>(N);
1284 SDValue Base, Offset, Addr;
1286 EVT EltVT = Mem->getMemoryVT();
1287 if (EltVT.isVector()) {
1288 EltVT = EltVT.getVectorElementType();
1291 if (SelectDirectAddr(Op1, Addr)) {
1292 switch (N->getOpcode()) {
1295 case ISD::INTRINSIC_W_CHAIN:
1297 switch (EltVT.getSimpleVT().SimpleTy) {
1301 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1304 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1307 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1310 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1313 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1316 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1320 switch (EltVT.getSimpleVT().SimpleTy) {
1324 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1327 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1330 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1333 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1336 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1339 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1344 case NVPTXISD::LDGV2:
1345 switch (EltVT.getSimpleVT().SimpleTy) {
1349 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1352 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1355 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1358 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1361 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1364 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1368 case NVPTXISD::LDUV2:
1369 switch (EltVT.getSimpleVT().SimpleTy) {
1373 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1376 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1379 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1382 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1385 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1388 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1392 case NVPTXISD::LDGV4:
1393 switch (EltVT.getSimpleVT().SimpleTy) {
1397 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1400 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1403 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1406 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1410 case NVPTXISD::LDUV4:
1411 switch (EltVT.getSimpleVT().SimpleTy) {
1415 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1418 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1421 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1424 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1430 SDValue Ops[] = { Addr, Chain };
1431 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1432 } else if (Subtarget->is64Bit()
1433 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1434 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1435 if (Subtarget->is64Bit()) {
1436 switch (N->getOpcode()) {
1439 case ISD::INTRINSIC_W_CHAIN:
1441 switch (EltVT.getSimpleVT().SimpleTy) {
1445 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1448 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1451 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1454 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1457 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1460 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1464 switch (EltVT.getSimpleVT().SimpleTy) {
1468 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1471 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1474 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1477 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1480 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1483 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1488 case NVPTXISD::LDGV2:
1489 switch (EltVT.getSimpleVT().SimpleTy) {
1493 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1496 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1499 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1502 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1505 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1508 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1512 case NVPTXISD::LDUV2:
1513 switch (EltVT.getSimpleVT().SimpleTy) {
1517 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1520 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1523 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1526 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1529 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1532 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1536 case NVPTXISD::LDGV4:
1537 switch (EltVT.getSimpleVT().SimpleTy) {
1541 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1544 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1547 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1550 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1554 case NVPTXISD::LDUV4:
1555 switch (EltVT.getSimpleVT().SimpleTy) {
1559 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1562 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1565 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1568 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1574 switch (N->getOpcode()) {
1577 case ISD::INTRINSIC_W_CHAIN:
1579 switch (EltVT.getSimpleVT().SimpleTy) {
1583 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1586 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1589 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1592 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1595 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1598 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1602 switch (EltVT.getSimpleVT().SimpleTy) {
1606 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1609 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1612 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1615 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1618 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1621 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1626 case NVPTXISD::LDGV2:
1627 switch (EltVT.getSimpleVT().SimpleTy) {
1631 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1634 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1637 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1640 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1643 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1646 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1650 case NVPTXISD::LDUV2:
1651 switch (EltVT.getSimpleVT().SimpleTy) {
1655 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1658 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1661 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1664 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1667 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1670 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1674 case NVPTXISD::LDGV4:
1675 switch (EltVT.getSimpleVT().SimpleTy) {
1679 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1682 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1685 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1688 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1692 case NVPTXISD::LDUV4:
1693 switch (EltVT.getSimpleVT().SimpleTy) {
1697 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1700 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1703 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1706 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1713 SDValue Ops[] = { Base, Offset, Chain };
1715 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1717 if (Subtarget->is64Bit()) {
1718 switch (N->getOpcode()) {
1721 case ISD::INTRINSIC_W_CHAIN:
1723 switch (EltVT.getSimpleVT().SimpleTy) {
1727 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1730 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1733 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1736 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1739 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1742 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1746 switch (EltVT.getSimpleVT().SimpleTy) {
1750 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1753 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1756 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1759 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1762 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1765 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1770 case NVPTXISD::LDGV2:
1771 switch (EltVT.getSimpleVT().SimpleTy) {
1775 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1778 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1781 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1784 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1787 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1790 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1794 case NVPTXISD::LDUV2:
1795 switch (EltVT.getSimpleVT().SimpleTy) {
1799 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1802 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1805 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1808 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1811 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1814 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1818 case NVPTXISD::LDGV4:
1819 switch (EltVT.getSimpleVT().SimpleTy) {
1823 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1826 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1829 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1832 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1836 case NVPTXISD::LDUV4:
1837 switch (EltVT.getSimpleVT().SimpleTy) {
1841 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1844 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1847 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1850 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1856 switch (N->getOpcode()) {
1859 case ISD::INTRINSIC_W_CHAIN:
1861 switch (EltVT.getSimpleVT().SimpleTy) {
1865 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1868 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1871 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1874 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1877 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1880 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1884 switch (EltVT.getSimpleVT().SimpleTy) {
1888 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1891 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1894 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1897 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1900 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1903 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1908 case NVPTXISD::LDGV2:
1909 switch (EltVT.getSimpleVT().SimpleTy) {
1913 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1916 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1919 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1922 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1925 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1928 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1932 case NVPTXISD::LDUV2:
1933 switch (EltVT.getSimpleVT().SimpleTy) {
1937 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1940 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1943 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1946 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1949 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1952 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1956 case NVPTXISD::LDGV4:
1957 switch (EltVT.getSimpleVT().SimpleTy) {
1961 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1964 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1967 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1970 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1974 case NVPTXISD::LDUV4:
1975 switch (EltVT.getSimpleVT().SimpleTy) {
1979 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1982 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1985 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1988 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1995 SDValue Ops[] = { Op1, Chain };
1996 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1999 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2000 MemRefs0[0] = Mem->getMemOperand();
2001 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
2006 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
2008 StoreSDNode *ST = cast<StoreSDNode>(N);
2009 EVT StoreVT = ST->getMemoryVT();
2010 SDNode *NVPTXST = nullptr;
2012 // do not support pre/post inc/dec
2013 if (ST->isIndexed())
2016 if (!StoreVT.isSimple())
2019 // Address Space Setting
2020 unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
2023 // - .volatile is only availalble for .global and .shared
2024 bool isVolatile = ST->isVolatile();
2025 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2026 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2027 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2031 MVT SimpleVT = StoreVT.getSimpleVT();
2032 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
2033 if (SimpleVT.isVector()) {
2034 unsigned num = SimpleVT.getVectorNumElements();
2036 vecType = NVPTX::PTXLdStInstCode::V2;
2038 vecType = NVPTX::PTXLdStInstCode::V4;
2043 // Type Setting: toType + toTypeWidth
2044 // - for integer type, always use 'u'
2046 MVT ScalarVT = SimpleVT.getScalarType();
2047 unsigned toTypeWidth = ScalarVT.getSizeInBits();
2048 unsigned int toType;
2049 if (ScalarVT.isFloatingPoint())
2050 toType = NVPTX::PTXLdStInstCode::Float;
2052 toType = NVPTX::PTXLdStInstCode::Unsigned;
2054 // Create the machine instruction DAG
2055 SDValue Chain = N->getOperand(0);
2056 SDValue N1 = N->getOperand(1);
2057 SDValue N2 = N->getOperand(2);
2059 SDValue Offset, Base;
2061 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
2063 if (SelectDirectAddr(N2, Addr)) {
2066 Opcode = NVPTX::ST_i8_avar;
2069 Opcode = NVPTX::ST_i16_avar;
2072 Opcode = NVPTX::ST_i32_avar;
2075 Opcode = NVPTX::ST_i64_avar;
2078 Opcode = NVPTX::ST_f32_avar;
2081 Opcode = NVPTX::ST_f64_avar;
2086 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2087 getI32Imm(vecType), getI32Imm(toType),
2088 getI32Imm(toTypeWidth), Addr, Chain };
2089 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2090 } else if (Subtarget->is64Bit()
2091 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2092 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2095 Opcode = NVPTX::ST_i8_asi;
2098 Opcode = NVPTX::ST_i16_asi;
2101 Opcode = NVPTX::ST_i32_asi;
2104 Opcode = NVPTX::ST_i64_asi;
2107 Opcode = NVPTX::ST_f32_asi;
2110 Opcode = NVPTX::ST_f64_asi;
2115 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2116 getI32Imm(vecType), getI32Imm(toType),
2117 getI32Imm(toTypeWidth), Base, Offset, Chain };
2118 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2119 } else if (Subtarget->is64Bit()
2120 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2121 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2122 if (Subtarget->is64Bit()) {
2125 Opcode = NVPTX::ST_i8_ari_64;
2128 Opcode = NVPTX::ST_i16_ari_64;
2131 Opcode = NVPTX::ST_i32_ari_64;
2134 Opcode = NVPTX::ST_i64_ari_64;
2137 Opcode = NVPTX::ST_f32_ari_64;
2140 Opcode = NVPTX::ST_f64_ari_64;
2148 Opcode = NVPTX::ST_i8_ari;
2151 Opcode = NVPTX::ST_i16_ari;
2154 Opcode = NVPTX::ST_i32_ari;
2157 Opcode = NVPTX::ST_i64_ari;
2160 Opcode = NVPTX::ST_f32_ari;
2163 Opcode = NVPTX::ST_f64_ari;
2169 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2170 getI32Imm(vecType), getI32Imm(toType),
2171 getI32Imm(toTypeWidth), Base, Offset, Chain };
2172 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2174 if (Subtarget->is64Bit()) {
2177 Opcode = NVPTX::ST_i8_areg_64;
2180 Opcode = NVPTX::ST_i16_areg_64;
2183 Opcode = NVPTX::ST_i32_areg_64;
2186 Opcode = NVPTX::ST_i64_areg_64;
2189 Opcode = NVPTX::ST_f32_areg_64;
2192 Opcode = NVPTX::ST_f64_areg_64;
2200 Opcode = NVPTX::ST_i8_areg;
2203 Opcode = NVPTX::ST_i16_areg;
2206 Opcode = NVPTX::ST_i32_areg;
2209 Opcode = NVPTX::ST_i64_areg;
2212 Opcode = NVPTX::ST_f32_areg;
2215 Opcode = NVPTX::ST_f64_areg;
2221 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
2222 getI32Imm(vecType), getI32Imm(toType),
2223 getI32Imm(toTypeWidth), N2, Chain };
2224 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2228 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2229 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2230 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2236 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
2237 SDValue Chain = N->getOperand(0);
2238 SDValue Op1 = N->getOperand(1);
2239 SDValue Addr, Offset, Base;
2243 EVT EltVT = Op1.getValueType();
2244 MemSDNode *MemSD = cast<MemSDNode>(N);
2245 EVT StoreVT = MemSD->getMemoryVT();
2247 // Address Space Setting
2248 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
2250 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2251 report_fatal_error("Cannot store to pointer that points to constant "
2256 // - .volatile is only availalble for .global and .shared
2257 bool IsVolatile = MemSD->isVolatile();
2258 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2259 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2260 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2263 // Type Setting: toType + toTypeWidth
2264 // - for integer type, always use 'u'
2265 assert(StoreVT.isSimple() && "Store value is not simple");
2266 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2267 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2269 if (ScalarVT.isFloatingPoint())
2270 ToType = NVPTX::PTXLdStInstCode::Float;
2272 ToType = NVPTX::PTXLdStInstCode::Unsigned;
2274 SmallVector<SDValue, 12> StOps;
2278 switch (N->getOpcode()) {
2279 case NVPTXISD::StoreV2:
2280 VecType = NVPTX::PTXLdStInstCode::V2;
2281 StOps.push_back(N->getOperand(1));
2282 StOps.push_back(N->getOperand(2));
2283 N2 = N->getOperand(3);
2285 case NVPTXISD::StoreV4:
2286 VecType = NVPTX::PTXLdStInstCode::V4;
2287 StOps.push_back(N->getOperand(1));
2288 StOps.push_back(N->getOperand(2));
2289 StOps.push_back(N->getOperand(3));
2290 StOps.push_back(N->getOperand(4));
2291 N2 = N->getOperand(5);
2297 StOps.push_back(getI32Imm(IsVolatile));
2298 StOps.push_back(getI32Imm(CodeAddrSpace));
2299 StOps.push_back(getI32Imm(VecType));
2300 StOps.push_back(getI32Imm(ToType));
2301 StOps.push_back(getI32Imm(ToTypeWidth));
2303 if (SelectDirectAddr(N2, Addr)) {
2304 switch (N->getOpcode()) {
2307 case NVPTXISD::StoreV2:
2308 switch (EltVT.getSimpleVT().SimpleTy) {
2312 Opcode = NVPTX::STV_i8_v2_avar;
2315 Opcode = NVPTX::STV_i16_v2_avar;
2318 Opcode = NVPTX::STV_i32_v2_avar;
2321 Opcode = NVPTX::STV_i64_v2_avar;
2324 Opcode = NVPTX::STV_f32_v2_avar;
2327 Opcode = NVPTX::STV_f64_v2_avar;
2331 case NVPTXISD::StoreV4:
2332 switch (EltVT.getSimpleVT().SimpleTy) {
2336 Opcode = NVPTX::STV_i8_v4_avar;
2339 Opcode = NVPTX::STV_i16_v4_avar;
2342 Opcode = NVPTX::STV_i32_v4_avar;
2345 Opcode = NVPTX::STV_f32_v4_avar;
2350 StOps.push_back(Addr);
2351 } else if (Subtarget->is64Bit()
2352 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2353 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2354 switch (N->getOpcode()) {
2357 case NVPTXISD::StoreV2:
2358 switch (EltVT.getSimpleVT().SimpleTy) {
2362 Opcode = NVPTX::STV_i8_v2_asi;
2365 Opcode = NVPTX::STV_i16_v2_asi;
2368 Opcode = NVPTX::STV_i32_v2_asi;
2371 Opcode = NVPTX::STV_i64_v2_asi;
2374 Opcode = NVPTX::STV_f32_v2_asi;
2377 Opcode = NVPTX::STV_f64_v2_asi;
2381 case NVPTXISD::StoreV4:
2382 switch (EltVT.getSimpleVT().SimpleTy) {
2386 Opcode = NVPTX::STV_i8_v4_asi;
2389 Opcode = NVPTX::STV_i16_v4_asi;
2392 Opcode = NVPTX::STV_i32_v4_asi;
2395 Opcode = NVPTX::STV_f32_v4_asi;
2400 StOps.push_back(Base);
2401 StOps.push_back(Offset);
2402 } else if (Subtarget->is64Bit()
2403 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2404 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2405 if (Subtarget->is64Bit()) {
2406 switch (N->getOpcode()) {
2409 case NVPTXISD::StoreV2:
2410 switch (EltVT.getSimpleVT().SimpleTy) {
2414 Opcode = NVPTX::STV_i8_v2_ari_64;
2417 Opcode = NVPTX::STV_i16_v2_ari_64;
2420 Opcode = NVPTX::STV_i32_v2_ari_64;
2423 Opcode = NVPTX::STV_i64_v2_ari_64;
2426 Opcode = NVPTX::STV_f32_v2_ari_64;
2429 Opcode = NVPTX::STV_f64_v2_ari_64;
2433 case NVPTXISD::StoreV4:
2434 switch (EltVT.getSimpleVT().SimpleTy) {
2438 Opcode = NVPTX::STV_i8_v4_ari_64;
2441 Opcode = NVPTX::STV_i16_v4_ari_64;
2444 Opcode = NVPTX::STV_i32_v4_ari_64;
2447 Opcode = NVPTX::STV_f32_v4_ari_64;
2453 switch (N->getOpcode()) {
2456 case NVPTXISD::StoreV2:
2457 switch (EltVT.getSimpleVT().SimpleTy) {
2461 Opcode = NVPTX::STV_i8_v2_ari;
2464 Opcode = NVPTX::STV_i16_v2_ari;
2467 Opcode = NVPTX::STV_i32_v2_ari;
2470 Opcode = NVPTX::STV_i64_v2_ari;
2473 Opcode = NVPTX::STV_f32_v2_ari;
2476 Opcode = NVPTX::STV_f64_v2_ari;
2480 case NVPTXISD::StoreV4:
2481 switch (EltVT.getSimpleVT().SimpleTy) {
2485 Opcode = NVPTX::STV_i8_v4_ari;
2488 Opcode = NVPTX::STV_i16_v4_ari;
2491 Opcode = NVPTX::STV_i32_v4_ari;
2494 Opcode = NVPTX::STV_f32_v4_ari;
2500 StOps.push_back(Base);
2501 StOps.push_back(Offset);
2503 if (Subtarget->is64Bit()) {
2504 switch (N->getOpcode()) {
2507 case NVPTXISD::StoreV2:
2508 switch (EltVT.getSimpleVT().SimpleTy) {
2512 Opcode = NVPTX::STV_i8_v2_areg_64;
2515 Opcode = NVPTX::STV_i16_v2_areg_64;
2518 Opcode = NVPTX::STV_i32_v2_areg_64;
2521 Opcode = NVPTX::STV_i64_v2_areg_64;
2524 Opcode = NVPTX::STV_f32_v2_areg_64;
2527 Opcode = NVPTX::STV_f64_v2_areg_64;
2531 case NVPTXISD::StoreV4:
2532 switch (EltVT.getSimpleVT().SimpleTy) {
2536 Opcode = NVPTX::STV_i8_v4_areg_64;
2539 Opcode = NVPTX::STV_i16_v4_areg_64;
2542 Opcode = NVPTX::STV_i32_v4_areg_64;
2545 Opcode = NVPTX::STV_f32_v4_areg_64;
2551 switch (N->getOpcode()) {
2554 case NVPTXISD::StoreV2:
2555 switch (EltVT.getSimpleVT().SimpleTy) {
2559 Opcode = NVPTX::STV_i8_v2_areg;
2562 Opcode = NVPTX::STV_i16_v2_areg;
2565 Opcode = NVPTX::STV_i32_v2_areg;
2568 Opcode = NVPTX::STV_i64_v2_areg;
2571 Opcode = NVPTX::STV_f32_v2_areg;
2574 Opcode = NVPTX::STV_f64_v2_areg;
2578 case NVPTXISD::StoreV4:
2579 switch (EltVT.getSimpleVT().SimpleTy) {
2583 Opcode = NVPTX::STV_i8_v4_areg;
2586 Opcode = NVPTX::STV_i16_v4_areg;
2589 Opcode = NVPTX::STV_i32_v4_areg;
2592 Opcode = NVPTX::STV_f32_v4_areg;
2598 StOps.push_back(N2);
2601 StOps.push_back(Chain);
2603 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2605 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2606 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2607 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2612 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2613 SDValue Chain = Node->getOperand(0);
2614 SDValue Offset = Node->getOperand(2);
2615 SDValue Flag = Node->getOperand(3);
2617 MemSDNode *Mem = cast<MemSDNode>(Node);
2620 switch (Node->getOpcode()) {
2623 case NVPTXISD::LoadParam:
2626 case NVPTXISD::LoadParamV2:
2629 case NVPTXISD::LoadParamV4:
2634 EVT EltVT = Node->getValueType(0);
2635 EVT MemVT = Mem->getMemoryVT();
2643 switch (MemVT.getSimpleVT().SimpleTy) {
2647 Opc = NVPTX::LoadParamMemI8;
2650 Opc = NVPTX::LoadParamMemI8;
2653 Opc = NVPTX::LoadParamMemI16;
2656 Opc = NVPTX::LoadParamMemI32;
2659 Opc = NVPTX::LoadParamMemI64;
2662 Opc = NVPTX::LoadParamMemF32;
2665 Opc = NVPTX::LoadParamMemF64;
2670 switch (MemVT.getSimpleVT().SimpleTy) {
2674 Opc = NVPTX::LoadParamMemV2I8;
2677 Opc = NVPTX::LoadParamMemV2I8;
2680 Opc = NVPTX::LoadParamMemV2I16;
2683 Opc = NVPTX::LoadParamMemV2I32;
2686 Opc = NVPTX::LoadParamMemV2I64;
2689 Opc = NVPTX::LoadParamMemV2F32;
2692 Opc = NVPTX::LoadParamMemV2F64;
2697 switch (MemVT.getSimpleVT().SimpleTy) {
2701 Opc = NVPTX::LoadParamMemV4I8;
2704 Opc = NVPTX::LoadParamMemV4I8;
2707 Opc = NVPTX::LoadParamMemV4I16;
2710 Opc = NVPTX::LoadParamMemV4I32;
2713 Opc = NVPTX::LoadParamMemV4F32;
2721 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2722 } else if (VecSize == 2) {
2723 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2725 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2726 VTs = CurDAG->getVTList(EVTs);
2729 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2731 SmallVector<SDValue, 2> Ops;
2732 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2733 Ops.push_back(Chain);
2734 Ops.push_back(Flag);
2737 CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2741 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2743 SDValue Chain = N->getOperand(0);
2744 SDValue Offset = N->getOperand(1);
2745 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2746 MemSDNode *Mem = cast<MemSDNode>(N);
2748 // How many elements do we have?
2749 unsigned NumElts = 1;
2750 switch (N->getOpcode()) {
2753 case NVPTXISD::StoreRetval:
2756 case NVPTXISD::StoreRetvalV2:
2759 case NVPTXISD::StoreRetvalV4:
2764 // Build vector of operands
2765 SmallVector<SDValue, 6> Ops;
2766 for (unsigned i = 0; i < NumElts; ++i)
2767 Ops.push_back(N->getOperand(i + 2));
2768 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2769 Ops.push_back(Chain);
2771 // Determine target opcode
2772 // If we have an i1, use an 8-bit store. The lowering code in
2773 // NVPTXISelLowering will have already emitted an upcast.
2774 unsigned Opcode = 0;
2779 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2783 Opcode = NVPTX::StoreRetvalI8;
2786 Opcode = NVPTX::StoreRetvalI8;
2789 Opcode = NVPTX::StoreRetvalI16;
2792 Opcode = NVPTX::StoreRetvalI32;
2795 Opcode = NVPTX::StoreRetvalI64;
2798 Opcode = NVPTX::StoreRetvalF32;
2801 Opcode = NVPTX::StoreRetvalF64;
2806 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2810 Opcode = NVPTX::StoreRetvalV2I8;
2813 Opcode = NVPTX::StoreRetvalV2I8;
2816 Opcode = NVPTX::StoreRetvalV2I16;
2819 Opcode = NVPTX::StoreRetvalV2I32;
2822 Opcode = NVPTX::StoreRetvalV2I64;
2825 Opcode = NVPTX::StoreRetvalV2F32;
2828 Opcode = NVPTX::StoreRetvalV2F64;
2833 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2837 Opcode = NVPTX::StoreRetvalV4I8;
2840 Opcode = NVPTX::StoreRetvalV4I8;
2843 Opcode = NVPTX::StoreRetvalV4I16;
2846 Opcode = NVPTX::StoreRetvalV4I32;
2849 Opcode = NVPTX::StoreRetvalV4F32;
2856 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2857 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2858 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2859 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2864 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2866 SDValue Chain = N->getOperand(0);
2867 SDValue Param = N->getOperand(1);
2868 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2869 SDValue Offset = N->getOperand(2);
2870 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2871 MemSDNode *Mem = cast<MemSDNode>(N);
2872 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2874 // How many elements do we have?
2875 unsigned NumElts = 1;
2876 switch (N->getOpcode()) {
2879 case NVPTXISD::StoreParamU32:
2880 case NVPTXISD::StoreParamS32:
2881 case NVPTXISD::StoreParam:
2884 case NVPTXISD::StoreParamV2:
2887 case NVPTXISD::StoreParamV4:
2892 // Build vector of operands
2893 SmallVector<SDValue, 8> Ops;
2894 for (unsigned i = 0; i < NumElts; ++i)
2895 Ops.push_back(N->getOperand(i + 3));
2896 Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
2897 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2898 Ops.push_back(Chain);
2899 Ops.push_back(Flag);
2901 // Determine target opcode
2902 // If we have an i1, use an 8-bit store. The lowering code in
2903 // NVPTXISelLowering will have already emitted an upcast.
2904 unsigned Opcode = 0;
2905 switch (N->getOpcode()) {
2911 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2915 Opcode = NVPTX::StoreParamI8;
2918 Opcode = NVPTX::StoreParamI8;
2921 Opcode = NVPTX::StoreParamI16;
2924 Opcode = NVPTX::StoreParamI32;
2927 Opcode = NVPTX::StoreParamI64;
2930 Opcode = NVPTX::StoreParamF32;
2933 Opcode = NVPTX::StoreParamF64;
2938 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2942 Opcode = NVPTX::StoreParamV2I8;
2945 Opcode = NVPTX::StoreParamV2I8;
2948 Opcode = NVPTX::StoreParamV2I16;
2951 Opcode = NVPTX::StoreParamV2I32;
2954 Opcode = NVPTX::StoreParamV2I64;
2957 Opcode = NVPTX::StoreParamV2F32;
2960 Opcode = NVPTX::StoreParamV2F64;
2965 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2969 Opcode = NVPTX::StoreParamV4I8;
2972 Opcode = NVPTX::StoreParamV4I8;
2975 Opcode = NVPTX::StoreParamV4I16;
2978 Opcode = NVPTX::StoreParamV4I32;
2981 Opcode = NVPTX::StoreParamV4F32;
2987 // Special case: if we have a sign-extend/zero-extend node, insert the
2988 // conversion instruction first, and use that as the value operand to
2989 // the selected StoreParam node.
2990 case NVPTXISD::StoreParamU32: {
2991 Opcode = NVPTX::StoreParamI32;
2992 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2994 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2995 MVT::i32, Ops[0], CvtNone);
2996 Ops[0] = SDValue(Cvt, 0);
2999 case NVPTXISD::StoreParamS32: {
3000 Opcode = NVPTX::StoreParamI32;
3001 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
3003 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
3004 MVT::i32, Ops[0], CvtNone);
3005 Ops[0] = SDValue(Cvt, 0);
3010 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
3012 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
3013 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
3014 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
3015 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
3020 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
3021 SDValue Chain = N->getOperand(0);
3022 SDNode *Ret = nullptr;
3024 SmallVector<SDValue, 8> Ops;
3026 switch (N->getOpcode()) {
3027 default: return nullptr;
3028 case NVPTXISD::Tex1DFloatS32:
3029 Opc = NVPTX::TEX_1D_F32_S32;
3031 case NVPTXISD::Tex1DFloatFloat:
3032 Opc = NVPTX::TEX_1D_F32_F32;
3034 case NVPTXISD::Tex1DFloatFloatLevel:
3035 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3037 case NVPTXISD::Tex1DFloatFloatGrad:
3038 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3040 case NVPTXISD::Tex1DS32S32:
3041 Opc = NVPTX::TEX_1D_S32_S32;
3043 case NVPTXISD::Tex1DS32Float:
3044 Opc = NVPTX::TEX_1D_S32_F32;
3046 case NVPTXISD::Tex1DS32FloatLevel:
3047 Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
3049 case NVPTXISD::Tex1DS32FloatGrad:
3050 Opc = NVPTX::TEX_1D_S32_F32_GRAD;
3052 case NVPTXISD::Tex1DU32S32:
3053 Opc = NVPTX::TEX_1D_U32_S32;
3055 case NVPTXISD::Tex1DU32Float:
3056 Opc = NVPTX::TEX_1D_U32_F32;
3058 case NVPTXISD::Tex1DU32FloatLevel:
3059 Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3061 case NVPTXISD::Tex1DU32FloatGrad:
3062 Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3064 case NVPTXISD::Tex1DArrayFloatS32:
3065 Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
3067 case NVPTXISD::Tex1DArrayFloatFloat:
3068 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3070 case NVPTXISD::Tex1DArrayFloatFloatLevel:
3071 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3073 case NVPTXISD::Tex1DArrayFloatFloatGrad:
3074 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3076 case NVPTXISD::Tex1DArrayS32S32:
3077 Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
3079 case NVPTXISD::Tex1DArrayS32Float:
3080 Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
3082 case NVPTXISD::Tex1DArrayS32FloatLevel:
3083 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
3085 case NVPTXISD::Tex1DArrayS32FloatGrad:
3086 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
3088 case NVPTXISD::Tex1DArrayU32S32:
3089 Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3091 case NVPTXISD::Tex1DArrayU32Float:
3092 Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3094 case NVPTXISD::Tex1DArrayU32FloatLevel:
3095 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3097 case NVPTXISD::Tex1DArrayU32FloatGrad:
3098 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3100 case NVPTXISD::Tex2DFloatS32:
3101 Opc = NVPTX::TEX_2D_F32_S32;
3103 case NVPTXISD::Tex2DFloatFloat:
3104 Opc = NVPTX::TEX_2D_F32_F32;
3106 case NVPTXISD::Tex2DFloatFloatLevel:
3107 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3109 case NVPTXISD::Tex2DFloatFloatGrad:
3110 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3112 case NVPTXISD::Tex2DS32S32:
3113 Opc = NVPTX::TEX_2D_S32_S32;
3115 case NVPTXISD::Tex2DS32Float:
3116 Opc = NVPTX::TEX_2D_S32_F32;
3118 case NVPTXISD::Tex2DS32FloatLevel:
3119 Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
3121 case NVPTXISD::Tex2DS32FloatGrad:
3122 Opc = NVPTX::TEX_2D_S32_F32_GRAD;
3124 case NVPTXISD::Tex2DU32S32:
3125 Opc = NVPTX::TEX_2D_U32_S32;
3127 case NVPTXISD::Tex2DU32Float:
3128 Opc = NVPTX::TEX_2D_U32_F32;
3130 case NVPTXISD::Tex2DU32FloatLevel:
3131 Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3133 case NVPTXISD::Tex2DU32FloatGrad:
3134 Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3136 case NVPTXISD::Tex2DArrayFloatS32:
3137 Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
3139 case NVPTXISD::Tex2DArrayFloatFloat:
3140 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3142 case NVPTXISD::Tex2DArrayFloatFloatLevel:
3143 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3145 case NVPTXISD::Tex2DArrayFloatFloatGrad:
3146 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3148 case NVPTXISD::Tex2DArrayS32S32:
3149 Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
3151 case NVPTXISD::Tex2DArrayS32Float:
3152 Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
3154 case NVPTXISD::Tex2DArrayS32FloatLevel:
3155 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
3157 case NVPTXISD::Tex2DArrayS32FloatGrad:
3158 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
3160 case NVPTXISD::Tex2DArrayU32S32:
3161 Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3163 case NVPTXISD::Tex2DArrayU32Float:
3164 Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3166 case NVPTXISD::Tex2DArrayU32FloatLevel:
3167 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3169 case NVPTXISD::Tex2DArrayU32FloatGrad:
3170 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3172 case NVPTXISD::Tex3DFloatS32:
3173 Opc = NVPTX::TEX_3D_F32_S32;
3175 case NVPTXISD::Tex3DFloatFloat:
3176 Opc = NVPTX::TEX_3D_F32_F32;
3178 case NVPTXISD::Tex3DFloatFloatLevel:
3179 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3181 case NVPTXISD::Tex3DFloatFloatGrad:
3182 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3184 case NVPTXISD::Tex3DS32S32:
3185 Opc = NVPTX::TEX_3D_S32_S32;
3187 case NVPTXISD::Tex3DS32Float:
3188 Opc = NVPTX::TEX_3D_S32_F32;
3190 case NVPTXISD::Tex3DS32FloatLevel:
3191 Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
3193 case NVPTXISD::Tex3DS32FloatGrad:
3194 Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3196 case NVPTXISD::Tex3DU32S32:
3197 Opc = NVPTX::TEX_3D_U32_S32;
3199 case NVPTXISD::Tex3DU32Float:
3200 Opc = NVPTX::TEX_3D_U32_F32;
3202 case NVPTXISD::Tex3DU32FloatLevel:
3203 Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3205 case NVPTXISD::Tex3DU32FloatGrad:
3206 Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3208 case NVPTXISD::TexCubeFloatFloat:
3209 Opc = NVPTX::TEX_CUBE_F32_F32;
3211 case NVPTXISD::TexCubeFloatFloatLevel:
3212 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3214 case NVPTXISD::TexCubeS32Float:
3215 Opc = NVPTX::TEX_CUBE_S32_F32;
3217 case NVPTXISD::TexCubeS32FloatLevel:
3218 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3220 case NVPTXISD::TexCubeU32Float:
3221 Opc = NVPTX::TEX_CUBE_U32_F32;
3223 case NVPTXISD::TexCubeU32FloatLevel:
3224 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3226 case NVPTXISD::TexCubeArrayFloatFloat:
3227 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3229 case NVPTXISD::TexCubeArrayFloatFloatLevel:
3230 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3232 case NVPTXISD::TexCubeArrayS32Float:
3233 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3235 case NVPTXISD::TexCubeArrayS32FloatLevel:
3236 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3238 case NVPTXISD::TexCubeArrayU32Float:
3239 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3241 case NVPTXISD::TexCubeArrayU32FloatLevel:
3242 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3244 case NVPTXISD::Tld4R2DFloatFloat:
3245 Opc = NVPTX::TLD4_R_2D_F32_F32;
3247 case NVPTXISD::Tld4G2DFloatFloat:
3248 Opc = NVPTX::TLD4_G_2D_F32_F32;
3250 case NVPTXISD::Tld4B2DFloatFloat:
3251 Opc = NVPTX::TLD4_B_2D_F32_F32;
3253 case NVPTXISD::Tld4A2DFloatFloat:
3254 Opc = NVPTX::TLD4_A_2D_F32_F32;
3256 case NVPTXISD::Tld4R2DS64Float:
3257 Opc = NVPTX::TLD4_R_2D_S32_F32;
3259 case NVPTXISD::Tld4G2DS64Float:
3260 Opc = NVPTX::TLD4_G_2D_S32_F32;
3262 case NVPTXISD::Tld4B2DS64Float:
3263 Opc = NVPTX::TLD4_B_2D_S32_F32;
3265 case NVPTXISD::Tld4A2DS64Float:
3266 Opc = NVPTX::TLD4_A_2D_S32_F32;
3268 case NVPTXISD::Tld4R2DU64Float:
3269 Opc = NVPTX::TLD4_R_2D_U32_F32;
3271 case NVPTXISD::Tld4G2DU64Float:
3272 Opc = NVPTX::TLD4_G_2D_U32_F32;
3274 case NVPTXISD::Tld4B2DU64Float:
3275 Opc = NVPTX::TLD4_B_2D_U32_F32;
3277 case NVPTXISD::Tld4A2DU64Float:
3278 Opc = NVPTX::TLD4_A_2D_U32_F32;
3280 case NVPTXISD::TexUnified1DFloatS32:
3281 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3283 case NVPTXISD::TexUnified1DFloatFloat:
3284 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3286 case NVPTXISD::TexUnified1DFloatFloatLevel:
3287 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3289 case NVPTXISD::TexUnified1DFloatFloatGrad:
3290 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3292 case NVPTXISD::TexUnified1DS32S32:
3293 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3295 case NVPTXISD::TexUnified1DS32Float:
3296 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3298 case NVPTXISD::TexUnified1DS32FloatLevel:
3299 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3301 case NVPTXISD::TexUnified1DS32FloatGrad:
3302 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3304 case NVPTXISD::TexUnified1DU32S32:
3305 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3307 case NVPTXISD::TexUnified1DU32Float:
3308 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3310 case NVPTXISD::TexUnified1DU32FloatLevel:
3311 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3313 case NVPTXISD::TexUnified1DU32FloatGrad:
3314 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3316 case NVPTXISD::TexUnified1DArrayFloatS32:
3317 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3319 case NVPTXISD::TexUnified1DArrayFloatFloat:
3320 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3322 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
3323 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3325 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
3326 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3328 case NVPTXISD::TexUnified1DArrayS32S32:
3329 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3331 case NVPTXISD::TexUnified1DArrayS32Float:
3332 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3334 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
3335 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3337 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
3338 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3340 case NVPTXISD::TexUnified1DArrayU32S32:
3341 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3343 case NVPTXISD::TexUnified1DArrayU32Float:
3344 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3346 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
3347 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3349 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
3350 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3352 case NVPTXISD::TexUnified2DFloatS32:
3353 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3355 case NVPTXISD::TexUnified2DFloatFloat:
3356 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3358 case NVPTXISD::TexUnified2DFloatFloatLevel:
3359 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3361 case NVPTXISD::TexUnified2DFloatFloatGrad:
3362 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3364 case NVPTXISD::TexUnified2DS32S32:
3365 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3367 case NVPTXISD::TexUnified2DS32Float:
3368 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3370 case NVPTXISD::TexUnified2DS32FloatLevel:
3371 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3373 case NVPTXISD::TexUnified2DS32FloatGrad:
3374 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3376 case NVPTXISD::TexUnified2DU32S32:
3377 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3379 case NVPTXISD::TexUnified2DU32Float:
3380 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3382 case NVPTXISD::TexUnified2DU32FloatLevel:
3383 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3385 case NVPTXISD::TexUnified2DU32FloatGrad:
3386 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3388 case NVPTXISD::TexUnified2DArrayFloatS32:
3389 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3391 case NVPTXISD::TexUnified2DArrayFloatFloat:
3392 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3394 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
3395 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3397 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
3398 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3400 case NVPTXISD::TexUnified2DArrayS32S32:
3401 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3403 case NVPTXISD::TexUnified2DArrayS32Float:
3404 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3406 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
3407 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3409 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
3410 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3412 case NVPTXISD::TexUnified2DArrayU32S32:
3413 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3415 case NVPTXISD::TexUnified2DArrayU32Float:
3416 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3418 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
3419 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3421 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
3422 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3424 case NVPTXISD::TexUnified3DFloatS32:
3425 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3427 case NVPTXISD::TexUnified3DFloatFloat:
3428 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3430 case NVPTXISD::TexUnified3DFloatFloatLevel:
3431 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3433 case NVPTXISD::TexUnified3DFloatFloatGrad:
3434 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3436 case NVPTXISD::TexUnified3DS32S32:
3437 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3439 case NVPTXISD::TexUnified3DS32Float:
3440 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3442 case NVPTXISD::TexUnified3DS32FloatLevel:
3443 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3445 case NVPTXISD::TexUnified3DS32FloatGrad:
3446 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3448 case NVPTXISD::TexUnified3DU32S32:
3449 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3451 case NVPTXISD::TexUnified3DU32Float:
3452 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3454 case NVPTXISD::TexUnified3DU32FloatLevel:
3455 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3457 case NVPTXISD::TexUnified3DU32FloatGrad:
3458 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3460 case NVPTXISD::TexUnifiedCubeFloatFloat:
3461 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3463 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
3464 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3466 case NVPTXISD::TexUnifiedCubeS32Float:
3467 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3469 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
3470 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3472 case NVPTXISD::TexUnifiedCubeU32Float:
3473 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3475 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
3476 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3478 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
3479 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3481 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
3482 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3484 case NVPTXISD::TexUnifiedCubeArrayS32Float:
3485 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3487 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
3488 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3490 case NVPTXISD::TexUnifiedCubeArrayU32Float:
3491 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3493 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
3494 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3496 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
3497 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3499 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
3500 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3502 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
3503 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3505 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
3506 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3508 case NVPTXISD::Tld4UnifiedR2DS64Float:
3509 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3511 case NVPTXISD::Tld4UnifiedG2DS64Float:
3512 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3514 case NVPTXISD::Tld4UnifiedB2DS64Float:
3515 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3517 case NVPTXISD::Tld4UnifiedA2DS64Float:
3518 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3520 case NVPTXISD::Tld4UnifiedR2DU64Float:
3521 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3523 case NVPTXISD::Tld4UnifiedG2DU64Float:
3524 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3526 case NVPTXISD::Tld4UnifiedB2DU64Float:
3527 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3529 case NVPTXISD::Tld4UnifiedA2DU64Float:
3530 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
3534 // Copy over operands
3535 for (unsigned i = 1; i < N->getNumOperands(); ++i) {
3536 Ops.push_back(N->getOperand(i));
3539 Ops.push_back(Chain);
3540 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3544 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
3545 SDValue Chain = N->getOperand(0);
3546 SDValue TexHandle = N->getOperand(1);
3547 SDNode *Ret = nullptr;
3549 SmallVector<SDValue, 8> Ops;
3550 switch (N->getOpcode()) {
3551 default: return nullptr;
3552 case NVPTXISD::Suld1DI8Clamp:
3553 Opc = NVPTX::SULD_1D_I8_CLAMP;
3554 Ops.push_back(TexHandle);
3555 Ops.push_back(N->getOperand(2));
3556 Ops.push_back(Chain);
3558 case NVPTXISD::Suld1DI16Clamp:
3559 Opc = NVPTX::SULD_1D_I16_CLAMP;
3560 Ops.push_back(TexHandle);
3561 Ops.push_back(N->getOperand(2));
3562 Ops.push_back(Chain);
3564 case NVPTXISD::Suld1DI32Clamp:
3565 Opc = NVPTX::SULD_1D_I32_CLAMP;
3566 Ops.push_back(TexHandle);
3567 Ops.push_back(N->getOperand(2));
3568 Ops.push_back(Chain);
3570 case NVPTXISD::Suld1DI64Clamp:
3571 Opc = NVPTX::SULD_1D_I64_CLAMP;
3572 Ops.push_back(TexHandle);
3573 Ops.push_back(N->getOperand(2));
3574 Ops.push_back(Chain);
3576 case NVPTXISD::Suld1DV2I8Clamp:
3577 Opc = NVPTX::SULD_1D_V2I8_CLAMP;
3578 Ops.push_back(TexHandle);
3579 Ops.push_back(N->getOperand(2));
3580 Ops.push_back(Chain);
3582 case NVPTXISD::Suld1DV2I16Clamp:
3583 Opc = NVPTX::SULD_1D_V2I16_CLAMP;
3584 Ops.push_back(TexHandle);
3585 Ops.push_back(N->getOperand(2));
3586 Ops.push_back(Chain);
3588 case NVPTXISD::Suld1DV2I32Clamp:
3589 Opc = NVPTX::SULD_1D_V2I32_CLAMP;
3590 Ops.push_back(TexHandle);
3591 Ops.push_back(N->getOperand(2));
3592 Ops.push_back(Chain);
3594 case NVPTXISD::Suld1DV2I64Clamp:
3595 Opc = NVPTX::SULD_1D_V2I64_CLAMP;
3596 Ops.push_back(TexHandle);
3597 Ops.push_back(N->getOperand(2));
3598 Ops.push_back(Chain);
3600 case NVPTXISD::Suld1DV4I8Clamp:
3601 Opc = NVPTX::SULD_1D_V4I8_CLAMP;
3602 Ops.push_back(TexHandle);
3603 Ops.push_back(N->getOperand(2));
3604 Ops.push_back(Chain);
3606 case NVPTXISD::Suld1DV4I16Clamp:
3607 Opc = NVPTX::SULD_1D_V4I16_CLAMP;
3608 Ops.push_back(TexHandle);
3609 Ops.push_back(N->getOperand(2));
3610 Ops.push_back(Chain);
3612 case NVPTXISD::Suld1DV4I32Clamp:
3613 Opc = NVPTX::SULD_1D_V4I32_CLAMP;
3614 Ops.push_back(TexHandle);
3615 Ops.push_back(N->getOperand(2));
3616 Ops.push_back(Chain);
3618 case NVPTXISD::Suld1DArrayI8Clamp:
3619 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
3620 Ops.push_back(TexHandle);
3621 Ops.push_back(N->getOperand(2));
3622 Ops.push_back(N->getOperand(3));
3623 Ops.push_back(Chain);
3625 case NVPTXISD::Suld1DArrayI16Clamp:
3626 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
3627 Ops.push_back(TexHandle);
3628 Ops.push_back(N->getOperand(2));
3629 Ops.push_back(N->getOperand(3));
3630 Ops.push_back(Chain);
3632 case NVPTXISD::Suld1DArrayI32Clamp:
3633 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
3634 Ops.push_back(TexHandle);
3635 Ops.push_back(N->getOperand(2));
3636 Ops.push_back(N->getOperand(3));
3637 Ops.push_back(Chain);
3639 case NVPTXISD::Suld1DArrayI64Clamp:
3640 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
3641 Ops.push_back(TexHandle);
3642 Ops.push_back(N->getOperand(2));
3643 Ops.push_back(N->getOperand(3));
3644 Ops.push_back(Chain);
3646 case NVPTXISD::Suld1DArrayV2I8Clamp:
3647 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
3648 Ops.push_back(TexHandle);
3649 Ops.push_back(N->getOperand(2));
3650 Ops.push_back(N->getOperand(3));
3651 Ops.push_back(Chain);
3653 case NVPTXISD::Suld1DArrayV2I16Clamp:
3654 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
3655 Ops.push_back(TexHandle);
3656 Ops.push_back(N->getOperand(2));
3657 Ops.push_back(N->getOperand(3));
3658 Ops.push_back(Chain);
3660 case NVPTXISD::Suld1DArrayV2I32Clamp:
3661 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
3662 Ops.push_back(TexHandle);
3663 Ops.push_back(N->getOperand(2));
3664 Ops.push_back(N->getOperand(3));
3665 Ops.push_back(Chain);
3667 case NVPTXISD::Suld1DArrayV2I64Clamp:
3668 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
3669 Ops.push_back(TexHandle);
3670 Ops.push_back(N->getOperand(2));
3671 Ops.push_back(N->getOperand(3));
3672 Ops.push_back(Chain);
3674 case NVPTXISD::Suld1DArrayV4I8Clamp:
3675 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
3676 Ops.push_back(TexHandle);
3677 Ops.push_back(N->getOperand(2));
3678 Ops.push_back(N->getOperand(3));
3679 Ops.push_back(Chain);
3681 case NVPTXISD::Suld1DArrayV4I16Clamp:
3682 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
3683 Ops.push_back(TexHandle);
3684 Ops.push_back(N->getOperand(2));
3685 Ops.push_back(N->getOperand(3));
3686 Ops.push_back(Chain);
3688 case NVPTXISD::Suld1DArrayV4I32Clamp:
3689 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
3690 Ops.push_back(TexHandle);
3691 Ops.push_back(N->getOperand(2));
3692 Ops.push_back(N->getOperand(3));
3693 Ops.push_back(Chain);
3695 case NVPTXISD::Suld2DI8Clamp:
3696 Opc = NVPTX::SULD_2D_I8_CLAMP;
3697 Ops.push_back(TexHandle);
3698 Ops.push_back(N->getOperand(2));
3699 Ops.push_back(N->getOperand(3));
3700 Ops.push_back(Chain);
3702 case NVPTXISD::Suld2DI16Clamp:
3703 Opc = NVPTX::SULD_2D_I16_CLAMP;
3704 Ops.push_back(TexHandle);
3705 Ops.push_back(N->getOperand(2));
3706 Ops.push_back(N->getOperand(3));
3707 Ops.push_back(Chain);
3709 case NVPTXISD::Suld2DI32Clamp:
3710 Opc = NVPTX::SULD_2D_I32_CLAMP;
3711 Ops.push_back(TexHandle);
3712 Ops.push_back(N->getOperand(2));
3713 Ops.push_back(N->getOperand(3));
3714 Ops.push_back(Chain);
3716 case NVPTXISD::Suld2DI64Clamp:
3717 Opc = NVPTX::SULD_2D_I64_CLAMP;
3718 Ops.push_back(TexHandle);
3719 Ops.push_back(N->getOperand(2));
3720 Ops.push_back(N->getOperand(3));
3721 Ops.push_back(Chain);
3723 case NVPTXISD::Suld2DV2I8Clamp:
3724 Opc = NVPTX::SULD_2D_V2I8_CLAMP;
3725 Ops.push_back(TexHandle);
3726 Ops.push_back(N->getOperand(2));
3727 Ops.push_back(N->getOperand(3));
3728 Ops.push_back(Chain);
3730 case NVPTXISD::Suld2DV2I16Clamp:
3731 Opc = NVPTX::SULD_2D_V2I16_CLAMP;
3732 Ops.push_back(TexHandle);
3733 Ops.push_back(N->getOperand(2));
3734 Ops.push_back(N->getOperand(3));
3735 Ops.push_back(Chain);
3737 case NVPTXISD::Suld2DV2I32Clamp:
3738 Opc = NVPTX::SULD_2D_V2I32_CLAMP;
3739 Ops.push_back(TexHandle);
3740 Ops.push_back(N->getOperand(2));
3741 Ops.push_back(N->getOperand(3));
3742 Ops.push_back(Chain);
3744 case NVPTXISD::Suld2DV2I64Clamp:
3745 Opc = NVPTX::SULD_2D_V2I64_CLAMP;
3746 Ops.push_back(TexHandle);
3747 Ops.push_back(N->getOperand(2));
3748 Ops.push_back(N->getOperand(3));
3749 Ops.push_back(Chain);
3751 case NVPTXISD::Suld2DV4I8Clamp:
3752 Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3753 Ops.push_back(TexHandle);
3754 Ops.push_back(N->getOperand(2));
3755 Ops.push_back(N->getOperand(3));
3756 Ops.push_back(Chain);
3758 case NVPTXISD::Suld2DV4I16Clamp:
3759 Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3760 Ops.push_back(TexHandle);
3761 Ops.push_back(N->getOperand(2));
3762 Ops.push_back(N->getOperand(3));
3763 Ops.push_back(Chain);
3765 case NVPTXISD::Suld2DV4I32Clamp:
3766 Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3767 Ops.push_back(TexHandle);
3768 Ops.push_back(N->getOperand(2));
3769 Ops.push_back(N->getOperand(3));
3770 Ops.push_back(Chain);
3772 case NVPTXISD::Suld2DArrayI8Clamp:
3773 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3774 Ops.push_back(TexHandle);
3775 Ops.push_back(N->getOperand(2));
3776 Ops.push_back(N->getOperand(3));
3777 Ops.push_back(N->getOperand(4));
3778 Ops.push_back(Chain);
3780 case NVPTXISD::Suld2DArrayI16Clamp:
3781 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3782 Ops.push_back(TexHandle);
3783 Ops.push_back(N->getOperand(2));
3784 Ops.push_back(N->getOperand(3));
3785 Ops.push_back(N->getOperand(4));
3786 Ops.push_back(Chain);
3788 case NVPTXISD::Suld2DArrayI32Clamp:
3789 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3790 Ops.push_back(TexHandle);
3791 Ops.push_back(N->getOperand(2));
3792 Ops.push_back(N->getOperand(3));
3793 Ops.push_back(N->getOperand(4));
3794 Ops.push_back(Chain);
3796 case NVPTXISD::Suld2DArrayI64Clamp:
3797 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3798 Ops.push_back(TexHandle);
3799 Ops.push_back(N->getOperand(2));
3800 Ops.push_back(N->getOperand(3));
3801 Ops.push_back(N->getOperand(4));
3802 Ops.push_back(Chain);
3804 case NVPTXISD::Suld2DArrayV2I8Clamp:
3805 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3806 Ops.push_back(TexHandle);
3807 Ops.push_back(N->getOperand(2));
3808 Ops.push_back(N->getOperand(3));
3809 Ops.push_back(N->getOperand(4));
3810 Ops.push_back(Chain);
3812 case NVPTXISD::Suld2DArrayV2I16Clamp:
3813 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3814 Ops.push_back(TexHandle);
3815 Ops.push_back(N->getOperand(2));
3816 Ops.push_back(N->getOperand(3));
3817 Ops.push_back(N->getOperand(4));
3818 Ops.push_back(Chain);
3820 case NVPTXISD::Suld2DArrayV2I32Clamp:
3821 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3822 Ops.push_back(TexHandle);
3823 Ops.push_back(N->getOperand(2));
3824 Ops.push_back(N->getOperand(3));
3825 Ops.push_back(N->getOperand(4));
3826 Ops.push_back(Chain);
3828 case NVPTXISD::Suld2DArrayV2I64Clamp:
3829 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3830 Ops.push_back(TexHandle);
3831 Ops.push_back(N->getOperand(2));
3832 Ops.push_back(N->getOperand(3));
3833 Ops.push_back(N->getOperand(4));
3834 Ops.push_back(Chain);
3836 case NVPTXISD::Suld2DArrayV4I8Clamp:
3837 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3838 Ops.push_back(TexHandle);
3839 Ops.push_back(N->getOperand(2));
3840 Ops.push_back(N->getOperand(3));
3841 Ops.push_back(N->getOperand(4));
3842 Ops.push_back(Chain);
3844 case NVPTXISD::Suld2DArrayV4I16Clamp:
3845 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3846 Ops.push_back(TexHandle);
3847 Ops.push_back(N->getOperand(2));
3848 Ops.push_back(N->getOperand(3));
3849 Ops.push_back(N->getOperand(4));
3850 Ops.push_back(Chain);
3852 case NVPTXISD::Suld2DArrayV4I32Clamp:
3853 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3854 Ops.push_back(TexHandle);
3855 Ops.push_back(N->getOperand(2));
3856 Ops.push_back(N->getOperand(3));
3857 Ops.push_back(N->getOperand(4));
3858 Ops.push_back(Chain);
3860 case NVPTXISD::Suld3DI8Clamp:
3861 Opc = NVPTX::SULD_3D_I8_CLAMP;
3862 Ops.push_back(TexHandle);
3863 Ops.push_back(N->getOperand(2));
3864 Ops.push_back(N->getOperand(3));
3865 Ops.push_back(N->getOperand(4));
3866 Ops.push_back(Chain);
3868 case NVPTXISD::Suld3DI16Clamp:
3869 Opc = NVPTX::SULD_3D_I16_CLAMP;
3870 Ops.push_back(TexHandle);
3871 Ops.push_back(N->getOperand(2));
3872 Ops.push_back(N->getOperand(3));
3873 Ops.push_back(N->getOperand(4));
3874 Ops.push_back(Chain);
3876 case NVPTXISD::Suld3DI32Clamp:
3877 Opc = NVPTX::SULD_3D_I32_CLAMP;
3878 Ops.push_back(TexHandle);
3879 Ops.push_back(N->getOperand(2));
3880 Ops.push_back(N->getOperand(3));
3881 Ops.push_back(N->getOperand(4));
3882 Ops.push_back(Chain);
3884 case NVPTXISD::Suld3DI64Clamp:
3885 Opc = NVPTX::SULD_3D_I64_CLAMP;
3886 Ops.push_back(TexHandle);
3887 Ops.push_back(N->getOperand(2));
3888 Ops.push_back(N->getOperand(3));
3889 Ops.push_back(N->getOperand(4));
3890 Ops.push_back(Chain);
3892 case NVPTXISD::Suld3DV2I8Clamp:
3893 Opc = NVPTX::SULD_3D_V2I8_CLAMP;
3894 Ops.push_back(TexHandle);
3895 Ops.push_back(N->getOperand(2));
3896 Ops.push_back(N->getOperand(3));
3897 Ops.push_back(N->getOperand(4));
3898 Ops.push_back(Chain);
3900 case NVPTXISD::Suld3DV2I16Clamp:
3901 Opc = NVPTX::SULD_3D_V2I16_CLAMP;
3902 Ops.push_back(TexHandle);
3903 Ops.push_back(N->getOperand(2));
3904 Ops.push_back(N->getOperand(3));
3905 Ops.push_back(N->getOperand(4));
3906 Ops.push_back(Chain);
3908 case NVPTXISD::Suld3DV2I32Clamp:
3909 Opc = NVPTX::SULD_3D_V2I32_CLAMP;
3910 Ops.push_back(TexHandle);
3911 Ops.push_back(N->getOperand(2));
3912 Ops.push_back(N->getOperand(3));
3913 Ops.push_back(N->getOperand(4));
3914 Ops.push_back(Chain);
3916 case NVPTXISD::Suld3DV2I64Clamp:
3917 Opc = NVPTX::SULD_3D_V2I64_CLAMP;
3918 Ops.push_back(TexHandle);
3919 Ops.push_back(N->getOperand(2));
3920 Ops.push_back(N->getOperand(3));
3921 Ops.push_back(N->getOperand(4));
3922 Ops.push_back(Chain);
3924 case NVPTXISD::Suld3DV4I8Clamp:
3925 Opc = NVPTX::SULD_3D_V4I8_CLAMP;
3926 Ops.push_back(TexHandle);
3927 Ops.push_back(N->getOperand(2));
3928 Ops.push_back(N->getOperand(3));
3929 Ops.push_back(N->getOperand(4));
3930 Ops.push_back(Chain);
3932 case NVPTXISD::Suld3DV4I16Clamp:
3933 Opc = NVPTX::SULD_3D_V4I16_CLAMP;
3934 Ops.push_back(TexHandle);
3935 Ops.push_back(N->getOperand(2));
3936 Ops.push_back(N->getOperand(3));
3937 Ops.push_back(N->getOperand(4));
3938 Ops.push_back(Chain);
3940 case NVPTXISD::Suld3DV4I32Clamp:
3941 Opc = NVPTX::SULD_3D_V4I32_CLAMP;
3942 Ops.push_back(TexHandle);
3943 Ops.push_back(N->getOperand(2));
3944 Ops.push_back(N->getOperand(3));
3945 Ops.push_back(N->getOperand(4));
3946 Ops.push_back(Chain);
3948 case NVPTXISD::Suld1DI8Trap:
3949 Opc = NVPTX::SULD_1D_I8_TRAP;
3950 Ops.push_back(TexHandle);
3951 Ops.push_back(N->getOperand(2));
3952 Ops.push_back(Chain);
3954 case NVPTXISD::Suld1DI16Trap:
3955 Opc = NVPTX::SULD_1D_I16_TRAP;
3956 Ops.push_back(TexHandle);
3957 Ops.push_back(N->getOperand(2));
3958 Ops.push_back(Chain);
3960 case NVPTXISD::Suld1DI32Trap:
3961 Opc = NVPTX::SULD_1D_I32_TRAP;
3962 Ops.push_back(TexHandle);
3963 Ops.push_back(N->getOperand(2));
3964 Ops.push_back(Chain);
3966 case NVPTXISD::Suld1DI64Trap:
3967 Opc = NVPTX::SULD_1D_I64_TRAP;
3968 Ops.push_back(TexHandle);
3969 Ops.push_back(N->getOperand(2));
3970 Ops.push_back(Chain);
3972 case NVPTXISD::Suld1DV2I8Trap:
3973 Opc = NVPTX::SULD_1D_V2I8_TRAP;
3974 Ops.push_back(TexHandle);
3975 Ops.push_back(N->getOperand(2));
3976 Ops.push_back(Chain);
3978 case NVPTXISD::Suld1DV2I16Trap:
3979 Opc = NVPTX::SULD_1D_V2I16_TRAP;
3980 Ops.push_back(TexHandle);
3981 Ops.push_back(N->getOperand(2));
3982 Ops.push_back(Chain);
3984 case NVPTXISD::Suld1DV2I32Trap:
3985 Opc = NVPTX::SULD_1D_V2I32_TRAP;
3986 Ops.push_back(TexHandle);
3987 Ops.push_back(N->getOperand(2));
3988 Ops.push_back(Chain);
3990 case NVPTXISD::Suld1DV2I64Trap:
3991 Opc = NVPTX::SULD_1D_V2I64_TRAP;
3992 Ops.push_back(TexHandle);
3993 Ops.push_back(N->getOperand(2));
3994 Ops.push_back(Chain);
3996 case NVPTXISD::Suld1DV4I8Trap:
3997 Opc = NVPTX::SULD_1D_V4I8_TRAP;
3998 Ops.push_back(TexHandle);
3999 Ops.push_back(N->getOperand(2));
4000 Ops.push_back(Chain);
4002 case NVPTXISD::Suld1DV4I16Trap:
4003 Opc = NVPTX::SULD_1D_V4I16_TRAP;
4004 Ops.push_back(TexHandle);
4005 Ops.push_back(N->getOperand(2));
4006 Ops.push_back(Chain);
4008 case NVPTXISD::Suld1DV4I32Trap:
4009 Opc = NVPTX::SULD_1D_V4I32_TRAP;
4010 Ops.push_back(TexHandle);
4011 Ops.push_back(N->getOperand(2));
4012 Ops.push_back(Chain);
4014 case NVPTXISD::Suld1DArrayI8Trap:
4015 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
4016 Ops.push_back(TexHandle);
4017 Ops.push_back(N->getOperand(2));
4018 Ops.push_back(N->getOperand(3));
4019 Ops.push_back(Chain);
4021 case NVPTXISD::Suld1DArrayI16Trap:
4022 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4023 Ops.push_back(TexHandle);
4024 Ops.push_back(N->getOperand(2));
4025 Ops.push_back(N->getOperand(3));
4026 Ops.push_back(Chain);
4028 case NVPTXISD::Suld1DArrayI32Trap:
4029 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4030 Ops.push_back(TexHandle);
4031 Ops.push_back(N->getOperand(2));
4032 Ops.push_back(N->getOperand(3));
4033 Ops.push_back(Chain);
4035 case NVPTXISD::Suld1DArrayI64Trap:
4036 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4037 Ops.push_back(TexHandle);
4038 Ops.push_back(N->getOperand(2));
4039 Ops.push_back(N->getOperand(3));
4040 Ops.push_back(Chain);
4042 case NVPTXISD::Suld1DArrayV2I8Trap:
4043 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4044 Ops.push_back(TexHandle);
4045 Ops.push_back(N->getOperand(2));
4046 Ops.push_back(N->getOperand(3));
4047 Ops.push_back(Chain);
4049 case NVPTXISD::Suld1DArrayV2I16Trap:
4050 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4051 Ops.push_back(TexHandle);
4052 Ops.push_back(N->getOperand(2));
4053 Ops.push_back(N->getOperand(3));
4054 Ops.push_back(Chain);
4056 case NVPTXISD::Suld1DArrayV2I32Trap:
4057 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4058 Ops.push_back(TexHandle);
4059 Ops.push_back(N->getOperand(2));
4060 Ops.push_back(N->getOperand(3));
4061 Ops.push_back(Chain);
4063 case NVPTXISD::Suld1DArrayV2I64Trap:
4064 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4065 Ops.push_back(TexHandle);
4066 Ops.push_back(N->getOperand(2));
4067 Ops.push_back(N->getOperand(3));
4068 Ops.push_back(Chain);
4070 case NVPTXISD::Suld1DArrayV4I8Trap:
4071 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4072 Ops.push_back(TexHandle);
4073 Ops.push_back(N->getOperand(2));
4074 Ops.push_back(N->getOperand(3));
4075 Ops.push_back(Chain);
4077 case NVPTXISD::Suld1DArrayV4I16Trap:
4078 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4079 Ops.push_back(TexHandle);
4080 Ops.push_back(N->getOperand(2));
4081 Ops.push_back(N->getOperand(3));
4082 Ops.push_back(Chain);
4084 case NVPTXISD::Suld1DArrayV4I32Trap:
4085 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4086 Ops.push_back(TexHandle);
4087 Ops.push_back(N->getOperand(2));
4088 Ops.push_back(N->getOperand(3));
4089 Ops.push_back(Chain);
4091 case NVPTXISD::Suld2DI8Trap:
4092 Opc = NVPTX::SULD_2D_I8_TRAP;
4093 Ops.push_back(TexHandle);
4094 Ops.push_back(N->getOperand(2));
4095 Ops.push_back(N->getOperand(3));
4096 Ops.push_back(Chain);
4098 case NVPTXISD::Suld2DI16Trap:
4099 Opc = NVPTX::SULD_2D_I16_TRAP;
4100 Ops.push_back(TexHandle);
4101 Ops.push_back(N->getOperand(2));
4102 Ops.push_back(N->getOperand(3));
4103 Ops.push_back(Chain);
4105 case NVPTXISD::Suld2DI32Trap:
4106 Opc = NVPTX::SULD_2D_I32_TRAP;
4107 Ops.push_back(TexHandle);
4108 Ops.push_back(N->getOperand(2));
4109 Ops.push_back(N->getOperand(3));
4110 Ops.push_back(Chain);
4112 case NVPTXISD::Suld2DI64Trap:
4113 Opc = NVPTX::SULD_2D_I64_TRAP;
4114 Ops.push_back(TexHandle);
4115 Ops.push_back(N->getOperand(2));
4116 Ops.push_back(N->getOperand(3));
4117 Ops.push_back(Chain);
4119 case NVPTXISD::Suld2DV2I8Trap:
4120 Opc = NVPTX::SULD_2D_V2I8_TRAP;
4121 Ops.push_back(TexHandle);
4122 Ops.push_back(N->getOperand(2));
4123 Ops.push_back(N->getOperand(3));
4124 Ops.push_back(Chain);
4126 case NVPTXISD::Suld2DV2I16Trap:
4127 Opc = NVPTX::SULD_2D_V2I16_TRAP;
4128 Ops.push_back(TexHandle);
4129 Ops.push_back(N->getOperand(2));
4130 Ops.push_back(N->getOperand(3));
4131 Ops.push_back(Chain);
4133 case NVPTXISD::Suld2DV2I32Trap:
4134 Opc = NVPTX::SULD_2D_V2I32_TRAP;
4135 Ops.push_back(TexHandle);
4136 Ops.push_back(N->getOperand(2));
4137 Ops.push_back(N->getOperand(3));
4138 Ops.push_back(Chain);
4140 case NVPTXISD::Suld2DV2I64Trap:
4141 Opc = NVPTX::SULD_2D_V2I64_TRAP;
4142 Ops.push_back(TexHandle);
4143 Ops.push_back(N->getOperand(2));
4144 Ops.push_back(N->getOperand(3));
4145 Ops.push_back(Chain);
4147 case NVPTXISD::Suld2DV4I8Trap:
4148 Opc = NVPTX::SULD_2D_V4I8_TRAP;
4149 Ops.push_back(TexHandle);
4150 Ops.push_back(N->getOperand(2));
4151 Ops.push_back(N->getOperand(3));
4152 Ops.push_back(Chain);
4154 case NVPTXISD::Suld2DV4I16Trap:
4155 Opc = NVPTX::SULD_2D_V4I16_TRAP;
4156 Ops.push_back(TexHandle);
4157 Ops.push_back(N->getOperand(2));
4158 Ops.push_back(N->getOperand(3));
4159 Ops.push_back(Chain);
4161 case NVPTXISD::Suld2DV4I32Trap:
4162 Opc = NVPTX::SULD_2D_V4I32_TRAP;
4163 Ops.push_back(TexHandle);
4164 Ops.push_back(N->getOperand(2));
4165 Ops.push_back(N->getOperand(3));
4166 Ops.push_back(Chain);
4168 case NVPTXISD::Suld2DArrayI8Trap:
4169 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4170 Ops.push_back(TexHandle);
4171 Ops.push_back(N->getOperand(2));
4172 Ops.push_back(N->getOperand(3));
4173 Ops.push_back(N->getOperand(4));
4174 Ops.push_back(Chain);
4176 case NVPTXISD::Suld2DArrayI16Trap:
4177 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4178 Ops.push_back(TexHandle);
4179 Ops.push_back(N->getOperand(2));
4180 Ops.push_back(N->getOperand(3));
4181 Ops.push_back(N->getOperand(4));
4182 Ops.push_back(Chain);
4184 case NVPTXISD::Suld2DArrayI32Trap:
4185 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4186 Ops.push_back(TexHandle);
4187 Ops.push_back(N->getOperand(2));
4188 Ops.push_back(N->getOperand(3));
4189 Ops.push_back(N->getOperand(4));
4190 Ops.push_back(Chain);
4192 case NVPTXISD::Suld2DArrayI64Trap:
4193 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4194 Ops.push_back(TexHandle);
4195 Ops.push_back(N->getOperand(2));
4196 Ops.push_back(N->getOperand(3));
4197 Ops.push_back(N->getOperand(4));
4198 Ops.push_back(Chain);
4200 case NVPTXISD::Suld2DArrayV2I8Trap:
4201 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4202 Ops.push_back(TexHandle);
4203 Ops.push_back(N->getOperand(2));
4204 Ops.push_back(N->getOperand(3));
4205 Ops.push_back(N->getOperand(4));
4206 Ops.push_back(Chain);
4208 case NVPTXISD::Suld2DArrayV2I16Trap:
4209 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4210 Ops.push_back(TexHandle);
4211 Ops.push_back(N->getOperand(2));
4212 Ops.push_back(N->getOperand(3));
4213 Ops.push_back(N->getOperand(4));
4214 Ops.push_back(Chain);
4216 case NVPTXISD::Suld2DArrayV2I32Trap:
4217 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4218 Ops.push_back(TexHandle);
4219 Ops.push_back(N->getOperand(2));
4220 Ops.push_back(N->getOperand(3));
4221 Ops.push_back(N->getOperand(4));
4222 Ops.push_back(Chain);
4224 case NVPTXISD::Suld2DArrayV2I64Trap:
4225 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4226 Ops.push_back(TexHandle);
4227 Ops.push_back(N->getOperand(2));
4228 Ops.push_back(N->getOperand(3));
4229 Ops.push_back(N->getOperand(4));
4230 Ops.push_back(Chain);
4232 case NVPTXISD::Suld2DArrayV4I8Trap:
4233 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4234 Ops.push_back(TexHandle);
4235 Ops.push_back(N->getOperand(2));
4236 Ops.push_back(N->getOperand(3));
4237 Ops.push_back(N->getOperand(4));
4238 Ops.push_back(Chain);
4240 case NVPTXISD::Suld2DArrayV4I16Trap:
4241 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4242 Ops.push_back(TexHandle);
4243 Ops.push_back(N->getOperand(2));
4244 Ops.push_back(N->getOperand(3));
4245 Ops.push_back(N->getOperand(4));
4246 Ops.push_back(Chain);
4248 case NVPTXISD::Suld2DArrayV4I32Trap:
4249 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4250 Ops.push_back(TexHandle);
4251 Ops.push_back(N->getOperand(2));
4252 Ops.push_back(N->getOperand(3));
4253 Ops.push_back(N->getOperand(4));
4254 Ops.push_back(Chain);
4256 case NVPTXISD::Suld3DI8Trap:
4257 Opc = NVPTX::SULD_3D_I8_TRAP;
4258 Ops.push_back(TexHandle);
4259 Ops.push_back(N->getOperand(2));
4260 Ops.push_back(N->getOperand(3));
4261 Ops.push_back(N->getOperand(4));
4262 Ops.push_back(Chain);
4264 case NVPTXISD::Suld3DI16Trap:
4265 Opc = NVPTX::SULD_3D_I16_TRAP;
4266 Ops.push_back(TexHandle);
4267 Ops.push_back(N->getOperand(2));
4268 Ops.push_back(N->getOperand(3));
4269 Ops.push_back(N->getOperand(4));
4270 Ops.push_back(Chain);
4272 case NVPTXISD::Suld3DI32Trap:
4273 Opc = NVPTX::SULD_3D_I32_TRAP;
4274 Ops.push_back(TexHandle);
4275 Ops.push_back(N->getOperand(2));
4276 Ops.push_back(N->getOperand(3));
4277 Ops.push_back(N->getOperand(4));
4278 Ops.push_back(Chain);
4280 case NVPTXISD::Suld3DI64Trap:
4281 Opc = NVPTX::SULD_3D_I64_TRAP;
4282 Ops.push_back(TexHandle);
4283 Ops.push_back(N->getOperand(2));
4284 Ops.push_back(N->getOperand(3));
4285 Ops.push_back(N->getOperand(4));
4286 Ops.push_back(Chain);
4288 case NVPTXISD::Suld3DV2I8Trap:
4289 Opc = NVPTX::SULD_3D_V2I8_TRAP;
4290 Ops.push_back(TexHandle);
4291 Ops.push_back(N->getOperand(2));
4292 Ops.push_back(N->getOperand(3));
4293 Ops.push_back(N->getOperand(4));
4294 Ops.push_back(Chain);
4296 case NVPTXISD::Suld3DV2I16Trap:
4297 Opc = NVPTX::SULD_3D_V2I16_TRAP;
4298 Ops.push_back(TexHandle);
4299 Ops.push_back(N->getOperand(2));
4300 Ops.push_back(N->getOperand(3));
4301 Ops.push_back(N->getOperand(4));
4302 Ops.push_back(Chain);
4304 case NVPTXISD::Suld3DV2I32Trap:
4305 Opc = NVPTX::SULD_3D_V2I32_TRAP;
4306 Ops.push_back(TexHandle);
4307 Ops.push_back(N->getOperand(2));
4308 Ops.push_back(N->getOperand(3));
4309 Ops.push_back(N->getOperand(4));
4310 Ops.push_back(Chain);
4312 case NVPTXISD::Suld3DV2I64Trap:
4313 Opc = NVPTX::SULD_3D_V2I64_TRAP;
4314 Ops.push_back(TexHandle);
4315 Ops.push_back(N->getOperand(2));
4316 Ops.push_back(N->getOperand(3));
4317 Ops.push_back(N->getOperand(4));
4318 Ops.push_back(Chain);
4320 case NVPTXISD::Suld3DV4I8Trap:
4321 Opc = NVPTX::SULD_3D_V4I8_TRAP;
4322 Ops.push_back(TexHandle);
4323 Ops.push_back(N->getOperand(2));
4324 Ops.push_back(N->getOperand(3));
4325 Ops.push_back(N->getOperand(4));
4326 Ops.push_back(Chain);
4328 case NVPTXISD::Suld3DV4I16Trap:
4329 Opc = NVPTX::SULD_3D_V4I16_TRAP;
4330 Ops.push_back(TexHandle);
4331 Ops.push_back(N->getOperand(2));
4332 Ops.push_back(N->getOperand(3));
4333 Ops.push_back(N->getOperand(4));
4334 Ops.push_back(Chain);
4336 case NVPTXISD::Suld3DV4I32Trap:
4337 Opc = NVPTX::SULD_3D_V4I32_TRAP;
4338 Ops.push_back(TexHandle);
4339 Ops.push_back(N->getOperand(2));
4340 Ops.push_back(N->getOperand(3));
4341 Ops.push_back(N->getOperand(4));
4342 Ops.push_back(Chain);
4344 case NVPTXISD::Suld1DI8Zero:
4345 Opc = NVPTX::SULD_1D_I8_ZERO;
4346 Ops.push_back(TexHandle);
4347 Ops.push_back(N->getOperand(2));
4348 Ops.push_back(Chain);
4350 case NVPTXISD::Suld1DI16Zero:
4351 Opc = NVPTX::SULD_1D_I16_ZERO;
4352 Ops.push_back(TexHandle);
4353 Ops.push_back(N->getOperand(2));
4354 Ops.push_back(Chain);
4356 case NVPTXISD::Suld1DI32Zero:
4357 Opc = NVPTX::SULD_1D_I32_ZERO;
4358 Ops.push_back(TexHandle);
4359 Ops.push_back(N->getOperand(2));
4360 Ops.push_back(Chain);
4362 case NVPTXISD::Suld1DI64Zero:
4363 Opc = NVPTX::SULD_1D_I64_ZERO;
4364 Ops.push_back(TexHandle);
4365 Ops.push_back(N->getOperand(2));
4366 Ops.push_back(Chain);
4368 case NVPTXISD::Suld1DV2I8Zero:
4369 Opc = NVPTX::SULD_1D_V2I8_ZERO;
4370 Ops.push_back(TexHandle);
4371 Ops.push_back(N->getOperand(2));
4372 Ops.push_back(Chain);
4374 case NVPTXISD::Suld1DV2I16Zero:
4375 Opc = NVPTX::SULD_1D_V2I16_ZERO;
4376 Ops.push_back(TexHandle);
4377 Ops.push_back(N->getOperand(2));
4378 Ops.push_back(Chain);
4380 case NVPTXISD::Suld1DV2I32Zero:
4381 Opc = NVPTX::SULD_1D_V2I32_ZERO;
4382 Ops.push_back(TexHandle);
4383 Ops.push_back(N->getOperand(2));
4384 Ops.push_back(Chain);
4386 case NVPTXISD::Suld1DV2I64Zero:
4387 Opc = NVPTX::SULD_1D_V2I64_ZERO;
4388 Ops.push_back(TexHandle);
4389 Ops.push_back(N->getOperand(2));
4390 Ops.push_back(Chain);
4392 case NVPTXISD::Suld1DV4I8Zero:
4393 Opc = NVPTX::SULD_1D_V4I8_ZERO;
4394 Ops.push_back(TexHandle);
4395 Ops.push_back(N->getOperand(2));
4396 Ops.push_back(Chain);
4398 case NVPTXISD::Suld1DV4I16Zero:
4399 Opc = NVPTX::SULD_1D_V4I16_ZERO;
4400 Ops.push_back(TexHandle);
4401 Ops.push_back(N->getOperand(2));
4402 Ops.push_back(Chain);
4404 case NVPTXISD::Suld1DV4I32Zero:
4405 Opc = NVPTX::SULD_1D_V4I32_ZERO;
4406 Ops.push_back(TexHandle);
4407 Ops.push_back(N->getOperand(2));
4408 Ops.push_back(Chain);
4410 case NVPTXISD::Suld1DArrayI8Zero:
4411 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4412 Ops.push_back(TexHandle);
4413 Ops.push_back(N->getOperand(2));
4414 Ops.push_back(N->getOperand(3));
4415 Ops.push_back(Chain);
4417 case NVPTXISD::Suld1DArrayI16Zero:
4418 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4419 Ops.push_back(TexHandle);
4420 Ops.push_back(N->getOperand(2));
4421 Ops.push_back(N->getOperand(3));
4422 Ops.push_back(Chain);
4424 case NVPTXISD::Suld1DArrayI32Zero:
4425 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4426 Ops.push_back(TexHandle);
4427 Ops.push_back(N->getOperand(2));
4428 Ops.push_back(N->getOperand(3));
4429 Ops.push_back(Chain);
4431 case NVPTXISD::Suld1DArrayI64Zero:
4432 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4433 Ops.push_back(TexHandle);
4434 Ops.push_back(N->getOperand(2));
4435 Ops.push_back(N->getOperand(3));
4436 Ops.push_back(Chain);
4438 case NVPTXISD::Suld1DArrayV2I8Zero:
4439 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4440 Ops.push_back(TexHandle);
4441 Ops.push_back(N->getOperand(2));
4442 Ops.push_back(N->getOperand(3));
4443 Ops.push_back(Chain);
4445 case NVPTXISD::Suld1DArrayV2I16Zero:
4446 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4447 Ops.push_back(TexHandle);
4448 Ops.push_back(N->getOperand(2));
4449 Ops.push_back(N->getOperand(3));
4450 Ops.push_back(Chain);
4452 case NVPTXISD::Suld1DArrayV2I32Zero:
4453 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4454 Ops.push_back(TexHandle);
4455 Ops.push_back(N->getOperand(2));
4456 Ops.push_back(N->getOperand(3));
4457 Ops.push_back(Chain);
4459 case NVPTXISD::Suld1DArrayV2I64Zero:
4460 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4461 Ops.push_back(TexHandle);
4462 Ops.push_back(N->getOperand(2));
4463 Ops.push_back(N->getOperand(3));
4464 Ops.push_back(Chain);
4466 case NVPTXISD::Suld1DArrayV4I8Zero:
4467 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4468 Ops.push_back(TexHandle);
4469 Ops.push_back(N->getOperand(2));
4470 Ops.push_back(N->getOperand(3));
4471 Ops.push_back(Chain);
4473 case NVPTXISD::Suld1DArrayV4I16Zero:
4474 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4475 Ops.push_back(TexHandle);
4476 Ops.push_back(N->getOperand(2));
4477 Ops.push_back(N->getOperand(3));
4478 Ops.push_back(Chain);
4480 case NVPTXISD::Suld1DArrayV4I32Zero:
4481 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4482 Ops.push_back(TexHandle);
4483 Ops.push_back(N->getOperand(2));
4484 Ops.push_back(N->getOperand(3));
4485 Ops.push_back(Chain);
4487 case NVPTXISD::Suld2DI8Zero:
4488 Opc = NVPTX::SULD_2D_I8_ZERO;
4489 Ops.push_back(TexHandle);
4490 Ops.push_back(N->getOperand(2));
4491 Ops.push_back(N->getOperand(3));
4492 Ops.push_back(Chain);
4494 case NVPTXISD::Suld2DI16Zero:
4495 Opc = NVPTX::SULD_2D_I16_ZERO;
4496 Ops.push_back(TexHandle);
4497 Ops.push_back(N->getOperand(2));
4498 Ops.push_back(N->getOperand(3));
4499 Ops.push_back(Chain);
4501 case NVPTXISD::Suld2DI32Zero:
4502 Opc = NVPTX::SULD_2D_I32_ZERO;
4503 Ops.push_back(TexHandle);
4504 Ops.push_back(N->getOperand(2));
4505 Ops.push_back(N->getOperand(3));
4506 Ops.push_back(Chain);
4508 case NVPTXISD::Suld2DI64Zero:
4509 Opc = NVPTX::SULD_2D_I64_ZERO;
4510 Ops.push_back(TexHandle);
4511 Ops.push_back(N->getOperand(2));
4512 Ops.push_back(N->getOperand(3));
4513 Ops.push_back(Chain);
4515 case NVPTXISD::Suld2DV2I8Zero:
4516 Opc = NVPTX::SULD_2D_V2I8_ZERO;
4517 Ops.push_back(TexHandle);
4518 Ops.push_back(N->getOperand(2));
4519 Ops.push_back(N->getOperand(3));
4520 Ops.push_back(Chain);
4522 case NVPTXISD::Suld2DV2I16Zero:
4523 Opc = NVPTX::SULD_2D_V2I16_ZERO;
4524 Ops.push_back(TexHandle);
4525 Ops.push_back(N->getOperand(2));
4526 Ops.push_back(N->getOperand(3));
4527 Ops.push_back(Chain);
4529 case NVPTXISD::Suld2DV2I32Zero:
4530 Opc = NVPTX::SULD_2D_V2I32_ZERO;
4531 Ops.push_back(TexHandle);
4532 Ops.push_back(N->getOperand(2));
4533 Ops.push_back(N->getOperand(3));
4534 Ops.push_back(Chain);
4536 case NVPTXISD::Suld2DV2I64Zero:
4537 Opc = NVPTX::SULD_2D_V2I64_ZERO;
4538 Ops.push_back(TexHandle);
4539 Ops.push_back(N->getOperand(2));
4540 Ops.push_back(N->getOperand(3));
4541 Ops.push_back(Chain);
4543 case NVPTXISD::Suld2DV4I8Zero:
4544 Opc = NVPTX::SULD_2D_V4I8_ZERO;
4545 Ops.push_back(TexHandle);
4546 Ops.push_back(N->getOperand(2));
4547 Ops.push_back(N->getOperand(3));
4548 Ops.push_back(Chain);
4550 case NVPTXISD::Suld2DV4I16Zero:
4551 Opc = NVPTX::SULD_2D_V4I16_ZERO;
4552 Ops.push_back(TexHandle);
4553 Ops.push_back(N->getOperand(2));
4554 Ops.push_back(N->getOperand(3));
4555 Ops.push_back(Chain);
4557 case NVPTXISD::Suld2DV4I32Zero:
4558 Opc = NVPTX::SULD_2D_V4I32_ZERO;
4559 Ops.push_back(TexHandle);
4560 Ops.push_back(N->getOperand(2));
4561 Ops.push_back(N->getOperand(3));
4562 Ops.push_back(Chain);
4564 case NVPTXISD::Suld2DArrayI8Zero:
4565 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
4566 Ops.push_back(TexHandle);
4567 Ops.push_back(N->getOperand(2));
4568 Ops.push_back(N->getOperand(3));
4569 Ops.push_back(N->getOperand(4));
4570 Ops.push_back(Chain);
4572 case NVPTXISD::Suld2DArrayI16Zero:
4573 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
4574 Ops.push_back(TexHandle);
4575 Ops.push_back(N->getOperand(2));
4576 Ops.push_back(N->getOperand(3));
4577 Ops.push_back(N->getOperand(4));
4578 Ops.push_back(Chain);
4580 case NVPTXISD::Suld2DArrayI32Zero:
4581 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
4582 Ops.push_back(TexHandle);
4583 Ops.push_back(N->getOperand(2));
4584 Ops.push_back(N->getOperand(3));
4585 Ops.push_back(N->getOperand(4));
4586 Ops.push_back(Chain);
4588 case NVPTXISD::Suld2DArrayI64Zero:
4589 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
4590 Ops.push_back(TexHandle);
4591 Ops.push_back(N->getOperand(2));
4592 Ops.push_back(N->getOperand(3));
4593 Ops.push_back(N->getOperand(4));
4594 Ops.push_back(Chain);
4596 case NVPTXISD::Suld2DArrayV2I8Zero:
4597 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
4598 Ops.push_back(TexHandle);
4599 Ops.push_back(N->getOperand(2));
4600 Ops.push_back(N->getOperand(3));
4601 Ops.push_back(N->getOperand(4));
4602 Ops.push_back(Chain);
4604 case NVPTXISD::Suld2DArrayV2I16Zero:
4605 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
4606 Ops.push_back(TexHandle);
4607 Ops.push_back(N->getOperand(2));
4608 Ops.push_back(N->getOperand(3));
4609 Ops.push_back(N->getOperand(4));
4610 Ops.push_back(Chain);
4612 case NVPTXISD::Suld2DArrayV2I32Zero:
4613 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
4614 Ops.push_back(TexHandle);
4615 Ops.push_back(N->getOperand(2));
4616 Ops.push_back(N->getOperand(3));
4617 Ops.push_back(N->getOperand(4));
4618 Ops.push_back(Chain);
4620 case NVPTXISD::Suld2DArrayV2I64Zero:
4621 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
4622 Ops.push_back(TexHandle);
4623 Ops.push_back(N->getOperand(2));
4624 Ops.push_back(N->getOperand(3));
4625 Ops.push_back(N->getOperand(4));
4626 Ops.push_back(Chain);
4628 case NVPTXISD::Suld2DArrayV4I8Zero:
4629 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
4630 Ops.push_back(TexHandle);
4631 Ops.push_back(N->getOperand(2));
4632 Ops.push_back(N->getOperand(3));
4633 Ops.push_back(N->getOperand(4));
4634 Ops.push_back(Chain);
4636 case NVPTXISD::Suld2DArrayV4I16Zero:
4637 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
4638 Ops.push_back(TexHandle);
4639 Ops.push_back(N->getOperand(2));
4640 Ops.push_back(N->getOperand(3));
4641 Ops.push_back(N->getOperand(4));
4642 Ops.push_back(Chain);
4644 case NVPTXISD::Suld2DArrayV4I32Zero:
4645 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
4646 Ops.push_back(TexHandle);
4647 Ops.push_back(N->getOperand(2));
4648 Ops.push_back(N->getOperand(3));
4649 Ops.push_back(N->getOperand(4));
4650 Ops.push_back(Chain);
4652 case NVPTXISD::Suld3DI8Zero:
4653 Opc = NVPTX::SULD_3D_I8_ZERO;
4654 Ops.push_back(TexHandle);
4655 Ops.push_back(N->getOperand(2));
4656 Ops.push_back(N->getOperand(3));
4657 Ops.push_back(N->getOperand(4));
4658 Ops.push_back(Chain);
4660 case NVPTXISD::Suld3DI16Zero:
4661 Opc = NVPTX::SULD_3D_I16_ZERO;
4662 Ops.push_back(TexHandle);
4663 Ops.push_back(N->getOperand(2));
4664 Ops.push_back(N->getOperand(3));
4665 Ops.push_back(N->getOperand(4));
4666 Ops.push_back(Chain);
4668 case NVPTXISD::Suld3DI32Zero:
4669 Opc = NVPTX::SULD_3D_I32_ZERO;
4670 Ops.push_back(TexHandle);
4671 Ops.push_back(N->getOperand(2));
4672 Ops.push_back(N->getOperand(3));
4673 Ops.push_back(N->getOperand(4));
4674 Ops.push_back(Chain);
4676 case NVPTXISD::Suld3DI64Zero:
4677 Opc = NVPTX::SULD_3D_I64_ZERO;
4678 Ops.push_back(TexHandle);
4679 Ops.push_back(N->getOperand(2));
4680 Ops.push_back(N->getOperand(3));
4681 Ops.push_back(N->getOperand(4));
4682 Ops.push_back(Chain);
4684 case NVPTXISD::Suld3DV2I8Zero:
4685 Opc = NVPTX::SULD_3D_V2I8_ZERO;
4686 Ops.push_back(TexHandle);
4687 Ops.push_back(N->getOperand(2));
4688 Ops.push_back(N->getOperand(3));
4689 Ops.push_back(N->getOperand(4));
4690 Ops.push_back(Chain);
4692 case NVPTXISD::Suld3DV2I16Zero:
4693 Opc = NVPTX::SULD_3D_V2I16_ZERO;
4694 Ops.push_back(TexHandle);
4695 Ops.push_back(N->getOperand(2));
4696 Ops.push_back(N->getOperand(3));
4697 Ops.push_back(N->getOperand(4));
4698 Ops.push_back(Chain);
4700 case NVPTXISD::Suld3DV2I32Zero:
4701 Opc = NVPTX::SULD_3D_V2I32_ZERO;
4702 Ops.push_back(TexHandle);
4703 Ops.push_back(N->getOperand(2));
4704 Ops.push_back(N->getOperand(3));
4705 Ops.push_back(N->getOperand(4));
4706 Ops.push_back(Chain);
4708 case NVPTXISD::Suld3DV2I64Zero:
4709 Opc = NVPTX::SULD_3D_V2I64_ZERO;
4710 Ops.push_back(TexHandle);
4711 Ops.push_back(N->getOperand(2));
4712 Ops.push_back(N->getOperand(3));
4713 Ops.push_back(N->getOperand(4));
4714 Ops.push_back(Chain);
4716 case NVPTXISD::Suld3DV4I8Zero:
4717 Opc = NVPTX::SULD_3D_V4I8_ZERO;
4718 Ops.push_back(TexHandle);
4719 Ops.push_back(N->getOperand(2));
4720 Ops.push_back(N->getOperand(3));
4721 Ops.push_back(N->getOperand(4));
4722 Ops.push_back(Chain);
4724 case NVPTXISD::Suld3DV4I16Zero:
4725 Opc = NVPTX::SULD_3D_V4I16_ZERO;
4726 Ops.push_back(TexHandle);
4727 Ops.push_back(N->getOperand(2));
4728 Ops.push_back(N->getOperand(3));
4729 Ops.push_back(N->getOperand(4));
4730 Ops.push_back(Chain);
4732 case NVPTXISD::Suld3DV4I32Zero:
4733 Opc = NVPTX::SULD_3D_V4I32_ZERO;
4734 Ops.push_back(TexHandle);
4735 Ops.push_back(N->getOperand(2));
4736 Ops.push_back(N->getOperand(3));
4737 Ops.push_back(N->getOperand(4));
4738 Ops.push_back(Chain);
4741 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4746 /// SelectBFE - Look for instruction sequences that can be made more efficient
4747 /// by using the 'bfe' (bit-field extract) PTX instruction
4748 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
4749 SDValue LHS = N->getOperand(0);
4750 SDValue RHS = N->getOperand(1);
4754 bool IsSigned = false;
4756 if (N->getOpcode() == ISD::AND) {
4757 // Canonicalize the operands
4758 // We want 'and %val, %mask'
4759 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4760 std::swap(LHS, RHS);
4763 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
4765 // We need a constant mask on the RHS of the AND
4769 // Extract the mask bits
4770 uint64_t MaskVal = Mask->getZExtValue();
4771 if (!isMask_64(MaskVal)) {
4772 // We *could* handle shifted masks here, but doing so would require an
4773 // 'and' operation to fix up the low-order bits so we would trade
4774 // shr+and for bfe+and, which has the same throughput
4778 // How many bits are in our mask?
4779 uint64_t NumBits = CountTrailingOnes_64(MaskVal);
4780 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
4782 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4783 // We have a 'srl/and' pair, extract the effective start bit and length
4784 Val = LHS.getNode()->getOperand(0);
4785 Start = LHS.getNode()->getOperand(1);
4786 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4788 uint64_t StartVal = StartConst->getZExtValue();
4789 // How many "good" bits do we have left? "good" is defined here as bits
4790 // that exist in the original value, not shifted in.
4791 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
4792 if (NumBits > GoodBits) {
4793 // Do not handle the case where bits have been shifted in. In theory
4794 // we could handle this, but the cost is likely higher than just
4795 // emitting the srl/and pair.
4798 Start = CurDAG->getTargetConstant(StartVal, MVT::i32);
4800 // Do not handle the case where the shift amount (can be zero if no srl
4801 // was found) is not constant. We could handle this case, but it would
4802 // require run-time logic that would be more expensive than just
4803 // emitting the srl/and pair.
4807 // Do not handle the case where the LHS of the and is not a shift. While
4808 // it would be trivial to handle this case, it would just transform
4809 // 'and' -> 'bfe', but 'and' has higher-throughput.
4812 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4813 if (LHS->getOpcode() == ISD::AND) {
4814 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4816 // Shift amount must be constant
4820 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4822 SDValue AndLHS = LHS->getOperand(0);
4823 SDValue AndRHS = LHS->getOperand(1);
4825 // Canonicalize the AND to have the mask on the RHS
4826 if (isa<ConstantSDNode>(AndLHS)) {
4827 std::swap(AndLHS, AndRHS);
4830 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4832 // Mask must be constant
4836 uint64_t MaskVal = MaskCnst->getZExtValue();
4839 if (isMask_64(MaskVal)) {
4841 // The number of bits in the result bitfield will be the number of
4842 // trailing ones (the AND) minus the number of bits we shift off
4843 NumBits = CountTrailingOnes_64(MaskVal) - ShiftAmt;
4844 } else if (isShiftedMask_64(MaskVal)) {
4845 NumZeros = countTrailingZeros(MaskVal);
4846 unsigned NumOnes = CountTrailingOnes_64(MaskVal >> NumZeros);
4847 // The number of bits in the result bitfield will be the number of
4848 // trailing zeros plus the number of set bits in the mask minus the
4849 // number of bits we shift off
4850 NumBits = NumZeros + NumOnes - ShiftAmt;
4852 // This is not a mask we can handle
4856 if (ShiftAmt < NumZeros) {
4857 // Handling this case would require extra logic that would make this
4858 // transformation non-profitable
4863 Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32);
4864 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
4865 } else if (LHS->getOpcode() == ISD::SHL) {
4866 // Here, we have a pattern like:
4868 // (sra (shl val, NN), MM)
4870 // (srl (shl val, NN), MM)
4872 // If MM >= NN, we can efficiently optimize this with bfe
4873 Val = LHS->getOperand(0);
4875 SDValue ShlRHS = LHS->getOperand(1);
4876 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
4878 // Shift amount must be constant
4881 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
4883 SDValue ShrRHS = RHS;
4884 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
4886 // Shift amount must be constant
4889 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
4891 // To avoid extra codegen and be profitable, we need Outer >= Inner
4892 if (OuterShiftAmt < InnerShiftAmt) {
4896 // If the outer shift is more than the type size, we have no bitfield to
4897 // extract (since we also check that the inner shift is <= the outer shift
4898 // then this also implies that the inner shift is < the type size)
4899 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
4904 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32);
4906 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
4907 OuterShiftAmt, MVT::i32);
4909 if (N->getOpcode() == ISD::SRA) {
4910 // If we have a arithmetic right shift, we need to use the signed bfe
4925 // For the BFE operations we form here from "and" and "srl", always use the
4926 // unsigned variants.
4927 if (Val.getValueType() == MVT::i32) {
4929 Opc = NVPTX::BFE_S32rii;
4931 Opc = NVPTX::BFE_U32rii;
4933 } else if (Val.getValueType() == MVT::i64) {
4935 Opc = NVPTX::BFE_S64rii;
4937 Opc = NVPTX::BFE_U64rii;
4940 // We cannot handle this type
4949 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4954 // SelectDirectAddr - Match a direct address for DAG.
4955 // A direct address could be a globaladdress or externalsymbol.
4956 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
4957 // Return true if TGA or ES.
4958 if (N.getOpcode() == ISD::TargetGlobalAddress ||
4959 N.getOpcode() == ISD::TargetExternalSymbol) {
4963 if (N.getOpcode() == NVPTXISD::Wrapper) {
4964 Address = N.getOperand(0);
4967 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
4968 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
4969 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
4970 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
4971 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
4977 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
4978 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
4979 if (Addr.getOpcode() == ISD::ADD) {
4980 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
4981 SDValue base = Addr.getOperand(0);
4982 if (SelectDirectAddr(base, Base)) {
4983 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
4992 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
4993 SDValue &Base, SDValue &Offset) {
4994 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
4998 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
4999 SDValue &Base, SDValue &Offset) {
5000 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
5004 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
5005 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5006 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
5007 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5008 Offset = CurDAG->getTargetConstant(0, mvt);
5011 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
5012 Addr.getOpcode() == ISD::TargetGlobalAddress)
5013 return false; // direct calls.
5015 if (Addr.getOpcode() == ISD::ADD) {
5016 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
5019 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5020 if (FrameIndexSDNode *FIN =
5021 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
5022 // Constant offset from frame ref.
5023 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5025 Base = Addr.getOperand(0);
5026 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
5034 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5035 SDValue &Base, SDValue &Offset) {
5036 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5040 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5041 SDValue &Base, SDValue &Offset) {
5042 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5045 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5046 unsigned int spN) const {
5047 const Value *Src = nullptr;
5048 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
5049 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5051 Src = mN->getMemOperand()->getValue();
5055 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
5056 return (PT->getAddressSpace() == spN);
5060 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5061 /// inline asm expressions.
5062 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
5063 const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
5065 switch (ConstraintCode) {
5069 if (SelectDirectAddr(Op, Op0)) {
5070 OutOps.push_back(Op0);
5071 OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
5074 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5075 OutOps.push_back(Op0);
5076 OutOps.push_back(Op1);