1 //===-- VectorElementize.cpp - Remove unreachable blocks for codegen --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass converts operations on vector types to operations on their
13 // For generic binary and unary vector instructions, the conversion is simple.
16 // where av, bv, and cv are vector virtual registers, and Vop is a vector op.
17 // This gets converted to the following :
21 // VectorToScalarMap maintains the vector vreg to scalar vreg mapping.
22 // For the above example, the map will look as follows:
26 // In addition, initVectorInfo creates the following opcode->opcode map.
28 // OtherVop => OtherSop
31 // For vector specific instructions like vecbuild, vecshuffle etc, the
32 // conversion is different. Look at comments near the functions with
33 // prefix createVec<...>.
35 //===----------------------------------------------------------------------===//
38 #include "NVPTXTargetMachine.h"
39 #include "llvm/ADT/DepthFirstIterator.h"
40 #include "llvm/ADT/SmallPtrSet.h"
41 #include "llvm/CodeGen/MachineFunctionPass.h"
42 #include "llvm/CodeGen/MachineInstrBuilder.h"
43 #include "llvm/CodeGen/MachineModuleInfo.h"
44 #include "llvm/CodeGen/MachineRegisterInfo.h"
45 #include "llvm/CodeGen/Passes.h"
46 #include "llvm/Constant.h"
47 #include "llvm/Function.h"
48 #include "llvm/Instructions.h"
49 #include "llvm/Pass.h"
50 #include "llvm/Support/CFG.h"
51 #include "llvm/Support/CommandLine.h"
52 #include "llvm/Support/Compiler.h"
53 #include "llvm/Target/TargetInstrInfo.h"
54 #include "llvm/Type.h"
60 class LLVM_LIBRARY_VISIBILITY VectorElementize : public MachineFunctionPass {
61 virtual bool runOnMachineFunction(MachineFunction &F);
63 NVPTXTargetMachine &TM;
64 MachineRegisterInfo *MRI;
65 const NVPTXRegisterInfo *RegInfo;
66 const NVPTXInstrInfo *InstrInfo;
68 llvm::DenseMap<const TargetRegisterClass *, const TargetRegisterClass *>
70 llvm::DenseMap<unsigned, bool> SimpleMoveMap;
72 llvm::DenseMap<unsigned, SmallVector<unsigned, 4> > VectorToScalarMap;
74 bool isVectorInstr(MachineInstr *);
76 SmallVector<unsigned, 4> getScalarRegisters(unsigned);
77 unsigned getScalarVersion(unsigned);
78 unsigned getScalarVersion(MachineInstr *);
80 bool isVectorRegister(unsigned);
81 const TargetRegisterClass *getScalarRegClass(const TargetRegisterClass *RC);
82 unsigned numCopiesNeeded(MachineInstr *);
84 void createLoadCopy(MachineFunction&, MachineInstr *,
85 std::vector<MachineInstr *>&);
86 void createStoreCopy(MachineFunction&, MachineInstr *,
87 std::vector<MachineInstr *>&);
89 void createVecDest(MachineFunction&, MachineInstr *,
90 std::vector<MachineInstr *>&);
92 void createCopies(MachineFunction&, MachineInstr *,
93 std::vector<MachineInstr *>&);
95 unsigned copyProp(MachineFunction&);
96 unsigned removeDeadMoves(MachineFunction&);
98 void elementize(MachineFunction&);
100 bool isSimpleMove(MachineInstr *);
102 void createVecShuffle(MachineFunction& F, MachineInstr *Instr,
103 std::vector<MachineInstr *>& copies);
105 void createVecExtract(MachineFunction& F, MachineInstr *Instr,
106 std::vector<MachineInstr *>& copies);
108 void createVecInsert(MachineFunction& F, MachineInstr *Instr,
109 std::vector<MachineInstr *>& copies);
111 void createVecBuild(MachineFunction& F, MachineInstr *Instr,
112 std::vector<MachineInstr *>& copies);
116 static char ID; // Pass identification, replacement for typeid
117 VectorElementize(NVPTXTargetMachine &tm)
118 : MachineFunctionPass(ID), TM(tm) {}
120 virtual const char *getPassName() const {
121 return "Convert LLVM vector types to their element types";
125 char VectorElementize::ID = 1;
129 RemoveRedundantMoves("nvptx-remove-redundant-moves",
130 cl::desc("NVPTX: Remove redundant moves introduced by vector lowering"),
133 #define VECINST(x) ((((x)->getDesc().TSFlags) & NVPTX::VecInstTypeMask) \
134 >> NVPTX::VecInstTypeShift)
135 #define ISVECINST(x) (VECINST(x) != NVPTX::VecNOP)
136 #define ISVECLOAD(x) (VECINST(x) == NVPTX::VecLoad)
137 #define ISVECSTORE(x) (VECINST(x) == NVPTX::VecStore)
138 #define ISVECBUILD(x) (VECINST(x) == NVPTX::VecBuild)
139 #define ISVECSHUFFLE(x) (VECINST(x) == NVPTX::VecShuffle)
140 #define ISVECEXTRACT(x) (VECINST(x) == NVPTX::VecExtract)
141 #define ISVECINSERT(x) (VECINST(x) == NVPTX::VecInsert)
142 #define ISVECDEST(x) (VECINST(x) == NVPTX::VecDest)
144 bool VectorElementize::isSimpleMove(MachineInstr *mi) {
147 unsigned TSFlags = (mi->getDesc().TSFlags & NVPTX::SimpleMoveMask)
148 >> NVPTX::SimpleMoveShift;
149 return (TSFlags == 1);
152 bool VectorElementize::isVectorInstr(MachineInstr *mi) {
153 if ((mi->getOpcode() == NVPTX::PHI) ||
154 (mi->getOpcode() == NVPTX::IMPLICIT_DEF) || mi->isCopy()) {
155 MachineOperand dest = mi->getOperand(0);
156 return isVectorRegister(dest.getReg());
158 return ISVECINST(mi);
161 unsigned VectorElementize::getScalarVersion(MachineInstr *mi) {
162 return getScalarVersion(mi->getOpcode());
165 ///=============================================================================
166 ///Instr is assumed to be a vector instruction. For most vector instructions,
167 ///the size of the destination vector register gives the number of scalar copies
168 ///needed. For VecStore, size of getOperand(1) gives the number of scalar copies
169 ///needed. For VecExtract, the dest is a scalar. So getOperand(1) gives the
170 ///number of scalar copies needed.
171 ///=============================================================================
172 unsigned VectorElementize::numCopiesNeeded(MachineInstr *Instr) {
175 for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
176 MachineOperand oper = Instr->getOperand(i);
178 if (!oper.isReg()) continue;
179 if (!oper.isDef()) continue;
183 assert((numDefs <= 1) && "Only 0 or 1 defs supported");
186 unsigned regnum = Instr->getOperand(def).getReg();
187 if (ISVECEXTRACT(Instr))
188 regnum = Instr->getOperand(1).getReg();
189 return getNVPTXVectorSize(MRI->getRegClass(regnum));
191 else if (numDefs == 0) {
192 assert(ISVECSTORE(Instr)
193 && "Only 0 def instruction supported is vector store");
195 unsigned regnum = Instr->getOperand(0).getReg();
196 return getNVPTXVectorSize(MRI->getRegClass(regnum));
201 const TargetRegisterClass *VectorElementize::
202 getScalarRegClass(const TargetRegisterClass *RC) {
203 assert(isNVPTXVectorRegClass(RC) &&
204 "Not a vector register class");
205 return getNVPTXElemClass(RC);
208 bool VectorElementize::isVectorRegister(unsigned reg) {
209 const TargetRegisterClass *RC=MRI->getRegClass(reg);
210 return isNVPTXVectorRegClass(RC);
213 ///=============================================================================
214 ///For every vector register 'v' that is not already in the VectorToScalarMap,
215 ///create n scalar registers of the corresponding element type, where n
216 ///is 2 or 4 (getNVPTXVectorSize) and add it VectorToScalarMap.
217 ///=============================================================================
218 SmallVector<unsigned, 4> VectorElementize::getScalarRegisters(unsigned regnum) {
219 assert(isVectorRegister(regnum) && "Expecting a vector register here");
220 // Create the scalar registers and put them in the map, if not already there.
221 if (VectorToScalarMap.find(regnum) == VectorToScalarMap.end()) {
222 const TargetRegisterClass *vecClass = MRI->getRegClass(regnum);
223 const TargetRegisterClass *scalarClass = getScalarRegClass(vecClass);
225 SmallVector<unsigned, 4> temp;
227 for (unsigned i=0, e=getNVPTXVectorSize(vecClass); i!=e; ++i)
228 temp.push_back(MRI->createVirtualRegister(scalarClass));
230 VectorToScalarMap[regnum] = temp;
232 return VectorToScalarMap[regnum];
235 ///=============================================================================
236 ///For a vector load of the form
238 ///the following multi output instruction is created :
239 ///[v1, v2] <= LD [addr]
240 ///Look at NVPTXVector.td for the definitions of multi output loads.
241 ///=============================================================================
242 void VectorElementize::createLoadCopy(MachineFunction& F, MachineInstr *Instr,
243 std::vector<MachineInstr *>& copies) {
244 copies.push_back(F.CloneMachineInstr(Instr));
246 MachineInstr *copy=copies[0];
247 copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
249 // Remove the dest, that should be a vector operand.
250 MachineOperand dest = copy->getOperand(0);
251 unsigned regnum = dest.getReg();
253 SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
254 copy->RemoveOperand(0);
256 std::vector<MachineOperand> otherOperands;
257 for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
258 otherOperands.push_back(copy->getOperand(i));
260 for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
261 copy->RemoveOperand(0);
263 for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) {
264 copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true));
267 for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
268 copy->addOperand(otherOperands[i]);
272 ///=============================================================================
273 ///For a vector store of the form
275 ///the following multi input instruction is created :
277 ///Look at NVPTXVector.td for the definitions of multi input stores.
278 ///=============================================================================
279 void VectorElementize::createStoreCopy(MachineFunction& F, MachineInstr *Instr,
280 std::vector<MachineInstr *>& copies) {
281 copies.push_back(F.CloneMachineInstr(Instr));
283 MachineInstr *copy=copies[0];
284 copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
286 MachineOperand src = copy->getOperand(0);
287 unsigned regnum = src.getReg();
289 SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
290 copy->RemoveOperand(0);
292 std::vector<MachineOperand> otherOperands;
293 for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
294 otherOperands.push_back(copy->getOperand(i));
296 for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
297 copy->RemoveOperand(0);
299 for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
300 copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], false));
302 for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
303 copy->addOperand(otherOperands[i]);
306 ///=============================================================================
307 ///va <= shufflev2 vb, vc, <i1>, <i2>
308 ///gets converted to 2 moves into a1 and a2. The source of the moves depend on
309 ///i1 and i2. i1, i2 can belong to the set {0, 1, 2, 3} for shufflev2. For
310 ///shufflev4 the set is {0,..7}. For example, if i1=3, i2=0, the move
311 ///instructions will be
314 ///=============================================================================
315 void VectorElementize::createVecShuffle(MachineFunction& F, MachineInstr *Instr,
316 std::vector<MachineInstr *>& copies) {
317 unsigned numcopies=numCopiesNeeded(Instr);
319 unsigned destregnum = Instr->getOperand(0).getReg();
320 unsigned src1regnum = Instr->getOperand(1).getReg();
321 unsigned src2regnum = Instr->getOperand(2).getReg();
323 SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
324 SmallVector<unsigned, 4> src1 = getScalarRegisters(src1regnum);
325 SmallVector<unsigned, 4> src2 = getScalarRegisters(src2regnum);
327 DebugLoc DL = Instr->getDebugLoc();
329 for (unsigned i=0; i<numcopies; i++) {
330 MachineInstr *copy = BuildMI(F, DL,
331 InstrInfo->get(getScalarVersion(Instr)), dest[i]);
332 MachineOperand which=Instr->getOperand(3+i);
333 assert(which.isImm() && "Shuffle operand not a constant");
335 int src=which.getImm();
336 int elem=src%numcopies;
338 if (which.getImm() < numcopies)
339 copy->addOperand(MachineOperand::CreateReg(src1[elem], false));
341 copy->addOperand(MachineOperand::CreateReg(src2[elem], false));
342 copies.push_back(copy);
346 ///=============================================================================
347 ///a <= extractv2 va, <i1>
348 ///gets turned into a simple move to the scalar register a. The source depends
350 ///=============================================================================
351 void VectorElementize::createVecExtract(MachineFunction& F, MachineInstr *Instr,
352 std::vector<MachineInstr *>& copies) {
353 unsigned srcregnum = Instr->getOperand(1).getReg();
355 SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum);
357 MachineOperand which = Instr->getOperand(2);
358 assert(which.isImm() && "Extract operand not a constant");
360 DebugLoc DL = Instr->getDebugLoc();
362 MachineInstr *copy = BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)),
363 Instr->getOperand(0).getReg());
364 copy->addOperand(MachineOperand::CreateReg(src[which.getImm()], false));
366 copies.push_back(copy);
369 ///=============================================================================
370 ///va <= vecinsertv2 vb, c, <i1>
371 ///This instruction copies all elements of vb to va, except the 'i1'th element.
372 ///The scalar value c becomes the 'i1'th element of va.
373 ///This gets translated to 2 (4 for vecinsertv4) moves.
374 ///=============================================================================
375 void VectorElementize::createVecInsert(MachineFunction& F, MachineInstr *Instr,
376 std::vector<MachineInstr *>& copies) {
377 unsigned numcopies=numCopiesNeeded(Instr);
379 unsigned destregnum = Instr->getOperand(0).getReg();
380 unsigned srcregnum = Instr->getOperand(1).getReg();
382 SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
383 SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum);
385 MachineOperand which=Instr->getOperand(3);
386 assert(which.isImm() && "Insert operand not a constant");
387 unsigned int elem=which.getImm();
389 DebugLoc DL = Instr->getDebugLoc();
391 for (unsigned i=0; i<numcopies; i++) {
392 MachineInstr *copy = BuildMI(F, DL,
393 InstrInfo->get(getScalarVersion(Instr)), dest[i]);
396 copy->addOperand(MachineOperand::CreateReg(src[i], false));
398 copy->addOperand(Instr->getOperand(2));
400 copies.push_back(copy);
405 ///=============================================================================
406 ///va <= buildv2 b1, b2
407 ///gets translated to
410 ///=============================================================================
411 void VectorElementize::createVecBuild(MachineFunction& F, MachineInstr *Instr,
412 std::vector<MachineInstr *>& copies) {
413 unsigned numcopies=numCopiesNeeded(Instr);
415 unsigned destregnum = Instr->getOperand(0).getReg();
417 SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
419 DebugLoc DL = Instr->getDebugLoc();
421 for (unsigned i=0; i<numcopies; i++) {
422 MachineInstr *copy = BuildMI(F, DL,
423 InstrInfo->get(getScalarVersion(Instr)), dest[i]);
425 copy->addOperand(Instr->getOperand(1+i));
427 copies.push_back(copy);
432 ///=============================================================================
433 ///For a tex inst of the form
434 ///va <= op [scalar operands]
435 ///the following multi output instruction is created :
436 ///[v1, v2] <= op' [scalar operands]
437 ///=============================================================================
438 void VectorElementize::createVecDest(MachineFunction& F, MachineInstr *Instr,
439 std::vector<MachineInstr *>& copies) {
440 copies.push_back(F.CloneMachineInstr(Instr));
442 MachineInstr *copy=copies[0];
443 copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
445 // Remove the dest, that should be a vector operand.
446 MachineOperand dest = copy->getOperand(0);
447 unsigned regnum = dest.getReg();
449 SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
450 copy->RemoveOperand(0);
452 std::vector<MachineOperand> otherOperands;
453 for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
454 otherOperands.push_back(copy->getOperand(i));
456 for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
457 copy->RemoveOperand(0);
459 for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
460 copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true));
462 for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
463 copy->addOperand(otherOperands[i]);
466 ///=============================================================================
467 ///Look at the vector instruction type and dispatch to the createVec<...>
468 ///function that creates the scalar copies.
469 ///=============================================================================
470 void VectorElementize::createCopies(MachineFunction& F, MachineInstr *Instr,
471 std::vector<MachineInstr *>& copies) {
472 if (ISVECLOAD(Instr)) {
473 createLoadCopy(F, Instr, copies);
476 if (ISVECSTORE(Instr)) {
477 createStoreCopy(F, Instr, copies);
480 if (ISVECSHUFFLE(Instr)) {
481 createVecShuffle(F, Instr, copies);
484 if (ISVECEXTRACT(Instr)) {
485 createVecExtract(F, Instr, copies);
488 if (ISVECINSERT(Instr)) {
489 createVecInsert(F, Instr, copies);
492 if (ISVECDEST(Instr)) {
493 createVecDest(F, Instr, copies);
496 if (ISVECBUILD(Instr)) {
497 createVecBuild(F, Instr, copies);
501 unsigned numcopies=numCopiesNeeded(Instr);
503 for (unsigned i=0; i<numcopies; ++i)
504 copies.push_back(F.CloneMachineInstr(Instr));
506 for (unsigned i=0; i<numcopies; ++i) {
507 MachineInstr *copy = copies[i];
509 std::vector<MachineOperand> allOperands;
510 std::vector<bool> isDef;
512 for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) {
513 MachineOperand oper = copy->getOperand(j);
514 allOperands.push_back(oper);
516 isDef.push_back(oper.isDef());
518 isDef.push_back(false);
521 for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j)
522 copy->RemoveOperand(0);
524 copy->setDesc(InstrInfo->get(getScalarVersion(Instr)));
526 for (unsigned j=0, e=allOperands.size(); j!=e; ++j) {
527 MachineOperand oper=allOperands[j];
529 unsigned regnum = oper.getReg();
530 if (isVectorRegister(regnum)) {
532 SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
533 copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], isDef[j]));
536 copy->addOperand(oper);
539 copy->addOperand(oper);
544 ///=============================================================================
545 ///Scan through all basic blocks, looking for vector instructions.
546 ///For each vector instruction I, insert the scalar copies before I, and
547 ///add I into toRemove vector. Finally remove all instructions in toRemove.
548 ///=============================================================================
549 void VectorElementize::elementize(MachineFunction &F) {
550 for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend();
552 MachineBasicBlock *BB = &*BI;
554 std::vector<MachineInstr *> copies;
555 std::vector<MachineInstr *> toRemove;
557 for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end();
559 MachineInstr *Instr = &*II;
561 if (!isVectorInstr(Instr))
565 createCopies(F, Instr, copies);
566 for (unsigned i=0, e=copies.size(); i!=e; ++i)
567 BB->insert(II, copies[i]);
569 assert((copies.size() > 0) && "Problem in createCopies");
570 toRemove.push_back(Instr);
572 for (unsigned i=0, e=toRemove.size(); i!=e; ++i)
573 F.DeleteMachineInstr(toRemove[i]->getParent()->remove(toRemove[i]));
577 ///=============================================================================
585 ///The original move is still present. This works on SSA form machine code.
586 ///Note that a <= b should be a simple vreg-to-vreg move instruction.
587 ///TBD : I didn't find a function that can do replaceOperand, so I remove
588 ///all operands and add all of them again, replacing the one while adding.
589 ///=============================================================================
590 unsigned VectorElementize::copyProp(MachineFunction &F) {
591 unsigned numReplacements = 0;
593 for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE;
595 MachineBasicBlock *BB = &*BI;
597 for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE;
599 MachineInstr *Instr = &*II;
601 // Don't do copy propagation on PHI as it will cause unnecessary
602 // live range overlap.
603 if ((Instr->getOpcode() == TargetOpcode::PHI) ||
604 (Instr->getOpcode() == TargetOpcode::DBG_VALUE))
607 bool needsReplacement = false;
609 for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
610 MachineOperand oper = Instr->getOperand(i);
611 if (!oper.isReg()) continue;
612 if (oper.isDef()) continue;
613 if (!RegInfo->isVirtualRegister(oper.getReg())) continue;
615 MachineInstr *defInstr = MRI->getVRegDef(oper.getReg());
617 if (!defInstr) continue;
619 if (!isSimpleMove(defInstr)) continue;
621 MachineOperand defSrc = defInstr->getOperand(1);
622 if (!defSrc.isReg()) continue;
623 if (!RegInfo->isVirtualRegister(defSrc.getReg())) continue;
625 needsReplacement = true;
628 if (!needsReplacement) continue;
632 std::vector<MachineOperand> operands;
634 for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
635 MachineOperand oper = Instr->getOperand(i);
642 if (!(RegInfo->isVirtualRegister(oper.getReg())))
644 MachineInstr *defInstr = MRI->getVRegDef(oper.getReg());
645 if (!(isSimpleMove(defInstr)))
647 MachineOperand defSrc = defInstr->getOperand(1);
648 if (!(defSrc.isReg()))
650 if (!(RegInfo->isVirtualRegister(defSrc.getReg())))
652 operands.push_back(defSrc);
656 operands.push_back(oper);
659 for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i)
660 Instr->RemoveOperand(0);
661 for (unsigned i=0, e=operands.size(); i!=e; ++i)
662 Instr->addOperand(operands[i]);
666 return numReplacements;
669 ///=============================================================================
670 ///Look for simple vreg-to-vreg instructions whose use_empty() is true, add
671 ///them to deadMoves vector. Then remove all instructions in deadMoves.
672 ///=============================================================================
673 unsigned VectorElementize::removeDeadMoves(MachineFunction &F) {
674 std::vector<MachineInstr *> deadMoves;
675 for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE;
677 MachineBasicBlock *BB = &*BI;
679 for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE;
681 MachineInstr *Instr = &*II;
683 if (!isSimpleMove(Instr)) continue;
685 MachineOperand dest = Instr->getOperand(0);
686 assert(dest.isReg() && "dest of move not a register");
687 assert(RegInfo->isVirtualRegister(dest.getReg()) &&
688 "dest of move not a virtual register");
690 if (MRI->use_empty(dest.getReg())) {
691 deadMoves.push_back(Instr);
696 for (unsigned i=0, e=deadMoves.size(); i!=e; ++i)
697 F.DeleteMachineInstr(deadMoves[i]->getParent()->remove(deadMoves[i]));
699 return deadMoves.size();
702 ///=============================================================================
703 ///Main function for this pass.
704 ///=============================================================================
705 bool VectorElementize::runOnMachineFunction(MachineFunction &F) {
706 MRI = &F.getRegInfo();
708 RegInfo = TM.getRegisterInfo();
709 InstrInfo = TM.getInstrInfo();
711 VectorToScalarMap.clear();
715 if (RemoveRedundantMoves)
717 if (copyProp(F) == 0) break;
724 FunctionPass *llvm::createVectorElementizePass(NVPTXTargetMachine &tm) {
725 return new VectorElementize(tm);
728 unsigned VectorElementize::getScalarVersion(unsigned opcode) {
729 if (opcode == NVPTX::PHI)
731 if (opcode == NVPTX::IMPLICIT_DEF)
734 default: llvm_unreachable("Scalar version not set, fix NVPTXVector.td");
735 case TargetOpcode::COPY: return TargetOpcode::COPY;
736 case NVPTX::AddCCCV2I32: return NVPTX::ADDCCCi32rr;
737 case NVPTX::AddCCCV4I32: return NVPTX::ADDCCCi32rr;
738 case NVPTX::AddCCV2I32: return NVPTX::ADDCCi32rr;
739 case NVPTX::AddCCV4I32: return NVPTX::ADDCCi32rr;
740 case NVPTX::Build_Vector2_f32: return NVPTX::FMOV32rr;
741 case NVPTX::Build_Vector2_f64: return NVPTX::FMOV64rr;
742 case NVPTX::Build_Vector2_i16: return NVPTX::IMOV16rr;
743 case NVPTX::Build_Vector2_i32: return NVPTX::IMOV32rr;
744 case NVPTX::Build_Vector2_i64: return NVPTX::IMOV64rr;
745 case NVPTX::Build_Vector2_i8: return NVPTX::IMOV8rr;
746 case NVPTX::Build_Vector4_f32: return NVPTX::FMOV32rr;
747 case NVPTX::Build_Vector4_i16: return NVPTX::IMOV16rr;
748 case NVPTX::Build_Vector4_i32: return NVPTX::IMOV32rr;
749 case NVPTX::Build_Vector4_i8: return NVPTX::IMOV8rr;
750 case NVPTX::CVTv2i16tov2i32: return NVPTX::Zint_extendext16to32;
751 case NVPTX::CVTv2i64tov2i32: return NVPTX::TRUNC_64to32;
752 case NVPTX::CVTv2i8tov2i32: return NVPTX::Zint_extendext8to32;
753 case NVPTX::CVTv4i16tov4i32: return NVPTX::Zint_extendext16to32;
754 case NVPTX::CVTv4i8tov4i32: return NVPTX::Zint_extendext8to32;
755 case NVPTX::F32MAD_ftzV2: return NVPTX::FMAD32_ftzrrr;
756 case NVPTX::F32MADV2: return NVPTX::FMAD32rrr;
757 case NVPTX::F32MAD_ftzV4: return NVPTX::FMAD32_ftzrrr;
758 case NVPTX::F32MADV4: return NVPTX::FMAD32rrr;
759 case NVPTX::F32FMA_ftzV2: return NVPTX::FMA32_ftzrrr;
760 case NVPTX::F32FMAV2: return NVPTX::FMA32rrr;
761 case NVPTX::F32FMA_ftzV4: return NVPTX::FMA32_ftzrrr;
762 case NVPTX::F32FMAV4: return NVPTX::FMA32rrr;
763 case NVPTX::F64FMAV2: return NVPTX::FMA64rrr;
764 case NVPTX::FVecEQV2F32: return NVPTX::FSetEQf32rr_toi32;
765 case NVPTX::FVecEQV2F64: return NVPTX::FSetEQf64rr_toi64;
766 case NVPTX::FVecEQV4F32: return NVPTX::FSetEQf32rr_toi32;
767 case NVPTX::FVecGEV2F32: return NVPTX::FSetGEf32rr_toi32;
768 case NVPTX::FVecGEV2F64: return NVPTX::FSetGEf64rr_toi64;
769 case NVPTX::FVecGEV4F32: return NVPTX::FSetGEf32rr_toi32;
770 case NVPTX::FVecGTV2F32: return NVPTX::FSetGTf32rr_toi32;
771 case NVPTX::FVecGTV2F64: return NVPTX::FSetGTf64rr_toi64;
772 case NVPTX::FVecGTV4F32: return NVPTX::FSetGTf32rr_toi32;
773 case NVPTX::FVecLEV2F32: return NVPTX::FSetLEf32rr_toi32;
774 case NVPTX::FVecLEV2F64: return NVPTX::FSetLEf64rr_toi64;
775 case NVPTX::FVecLEV4F32: return NVPTX::FSetLEf32rr_toi32;
776 case NVPTX::FVecLTV2F32: return NVPTX::FSetLTf32rr_toi32;
777 case NVPTX::FVecLTV2F64: return NVPTX::FSetLTf64rr_toi64;
778 case NVPTX::FVecLTV4F32: return NVPTX::FSetLTf32rr_toi32;
779 case NVPTX::FVecNANV2F32: return NVPTX::FSetNANf32rr_toi32;
780 case NVPTX::FVecNANV2F64: return NVPTX::FSetNANf64rr_toi64;
781 case NVPTX::FVecNANV4F32: return NVPTX::FSetNANf32rr_toi32;
782 case NVPTX::FVecNEV2F32: return NVPTX::FSetNEf32rr_toi32;
783 case NVPTX::FVecNEV2F64: return NVPTX::FSetNEf64rr_toi64;
784 case NVPTX::FVecNEV4F32: return NVPTX::FSetNEf32rr_toi32;
785 case NVPTX::FVecNUMV2F32: return NVPTX::FSetNUMf32rr_toi32;
786 case NVPTX::FVecNUMV2F64: return NVPTX::FSetNUMf64rr_toi64;
787 case NVPTX::FVecNUMV4F32: return NVPTX::FSetNUMf32rr_toi32;
788 case NVPTX::FVecUEQV2F32: return NVPTX::FSetUEQf32rr_toi32;
789 case NVPTX::FVecUEQV2F64: return NVPTX::FSetUEQf64rr_toi64;
790 case NVPTX::FVecUEQV4F32: return NVPTX::FSetUEQf32rr_toi32;
791 case NVPTX::FVecUGEV2F32: return NVPTX::FSetUGEf32rr_toi32;
792 case NVPTX::FVecUGEV2F64: return NVPTX::FSetUGEf64rr_toi64;
793 case NVPTX::FVecUGEV4F32: return NVPTX::FSetUGEf32rr_toi32;
794 case NVPTX::FVecUGTV2F32: return NVPTX::FSetUGTf32rr_toi32;
795 case NVPTX::FVecUGTV2F64: return NVPTX::FSetUGTf64rr_toi64;
796 case NVPTX::FVecUGTV4F32: return NVPTX::FSetUGTf32rr_toi32;
797 case NVPTX::FVecULEV2F32: return NVPTX::FSetULEf32rr_toi32;
798 case NVPTX::FVecULEV2F64: return NVPTX::FSetULEf64rr_toi64;
799 case NVPTX::FVecULEV4F32: return NVPTX::FSetULEf32rr_toi32;
800 case NVPTX::FVecULTV2F32: return NVPTX::FSetULTf32rr_toi32;
801 case NVPTX::FVecULTV2F64: return NVPTX::FSetULTf64rr_toi64;
802 case NVPTX::FVecULTV4F32: return NVPTX::FSetULTf32rr_toi32;
803 case NVPTX::FVecUNEV2F32: return NVPTX::FSetUNEf32rr_toi32;
804 case NVPTX::FVecUNEV2F64: return NVPTX::FSetUNEf64rr_toi64;
805 case NVPTX::FVecUNEV4F32: return NVPTX::FSetUNEf32rr_toi32;
806 case NVPTX::I16MADV2: return NVPTX::MAD16rrr;
807 case NVPTX::I16MADV4: return NVPTX::MAD16rrr;
808 case NVPTX::I32MADV2: return NVPTX::MAD32rrr;
809 case NVPTX::I32MADV4: return NVPTX::MAD32rrr;
810 case NVPTX::I64MADV2: return NVPTX::MAD64rrr;
811 case NVPTX::I8MADV2: return NVPTX::MAD8rrr;
812 case NVPTX::I8MADV4: return NVPTX::MAD8rrr;
813 case NVPTX::ShiftLV2I16: return NVPTX::SHLi16rr;
814 case NVPTX::ShiftLV2I32: return NVPTX::SHLi32rr;
815 case NVPTX::ShiftLV2I64: return NVPTX::SHLi64rr;
816 case NVPTX::ShiftLV2I8: return NVPTX::SHLi8rr;
817 case NVPTX::ShiftLV4I16: return NVPTX::SHLi16rr;
818 case NVPTX::ShiftLV4I32: return NVPTX::SHLi32rr;
819 case NVPTX::ShiftLV4I8: return NVPTX::SHLi8rr;
820 case NVPTX::ShiftRAV2I16: return NVPTX::SRAi16rr;
821 case NVPTX::ShiftRAV2I32: return NVPTX::SRAi32rr;
822 case NVPTX::ShiftRAV2I64: return NVPTX::SRAi64rr;
823 case NVPTX::ShiftRAV2I8: return NVPTX::SRAi8rr;
824 case NVPTX::ShiftRAV4I16: return NVPTX::SRAi16rr;
825 case NVPTX::ShiftRAV4I32: return NVPTX::SRAi32rr;
826 case NVPTX::ShiftRAV4I8: return NVPTX::SRAi8rr;
827 case NVPTX::ShiftRLV2I16: return NVPTX::SRLi16rr;
828 case NVPTX::ShiftRLV2I32: return NVPTX::SRLi32rr;
829 case NVPTX::ShiftRLV2I64: return NVPTX::SRLi64rr;
830 case NVPTX::ShiftRLV2I8: return NVPTX::SRLi8rr;
831 case NVPTX::ShiftRLV4I16: return NVPTX::SRLi16rr;
832 case NVPTX::ShiftRLV4I32: return NVPTX::SRLi32rr;
833 case NVPTX::ShiftRLV4I8: return NVPTX::SRLi8rr;
834 case NVPTX::SubCCCV2I32: return NVPTX::SUBCCCi32rr;
835 case NVPTX::SubCCCV4I32: return NVPTX::SUBCCCi32rr;
836 case NVPTX::SubCCV2I32: return NVPTX::SUBCCi32rr;
837 case NVPTX::SubCCV4I32: return NVPTX::SUBCCi32rr;
838 case NVPTX::V2F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz;
839 case NVPTX::V2F32Div_prec: return NVPTX::FDIV32rr_prec;
840 case NVPTX::V2F32Div_ftz: return NVPTX::FDIV32rr_ftz;
841 case NVPTX::V2F32Div: return NVPTX::FDIV32rr;
842 case NVPTX::V2F32_Select: return NVPTX::SELECTf32rr;
843 case NVPTX::V2F64Div: return NVPTX::FDIV64rr;
844 case NVPTX::V2F64_Select: return NVPTX::SELECTf64rr;
845 case NVPTX::V2I16_Select: return NVPTX::SELECTi16rr;
846 case NVPTX::V2I32_Select: return NVPTX::SELECTi32rr;
847 case NVPTX::V2I64_Select: return NVPTX::SELECTi64rr;
848 case NVPTX::V2I8_Select: return NVPTX::SELECTi8rr;
849 case NVPTX::V2f32Extract: return NVPTX::FMOV32rr;
850 case NVPTX::V2f32Insert: return NVPTX::FMOV32rr;
851 case NVPTX::V2f32Mov: return NVPTX::FMOV32rr;
852 case NVPTX::V2f64Extract: return NVPTX::FMOV64rr;
853 case NVPTX::V2f64Insert: return NVPTX::FMOV64rr;
854 case NVPTX::V2f64Mov: return NVPTX::FMOV64rr;
855 case NVPTX::V2i16Extract: return NVPTX::IMOV16rr;
856 case NVPTX::V2i16Insert: return NVPTX::IMOV16rr;
857 case NVPTX::V2i16Mov: return NVPTX::IMOV16rr;
858 case NVPTX::V2i32Extract: return NVPTX::IMOV32rr;
859 case NVPTX::V2i32Insert: return NVPTX::IMOV32rr;
860 case NVPTX::V2i32Mov: return NVPTX::IMOV32rr;
861 case NVPTX::V2i64Extract: return NVPTX::IMOV64rr;
862 case NVPTX::V2i64Insert: return NVPTX::IMOV64rr;
863 case NVPTX::V2i64Mov: return NVPTX::IMOV64rr;
864 case NVPTX::V2i8Extract: return NVPTX::IMOV8rr;
865 case NVPTX::V2i8Insert: return NVPTX::IMOV8rr;
866 case NVPTX::V2i8Mov: return NVPTX::IMOV8rr;
867 case NVPTX::V4F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz;
868 case NVPTX::V4F32Div_prec: return NVPTX::FDIV32rr_prec;
869 case NVPTX::V4F32Div_ftz: return NVPTX::FDIV32rr_ftz;
870 case NVPTX::V4F32Div: return NVPTX::FDIV32rr;
871 case NVPTX::V4F32_Select: return NVPTX::SELECTf32rr;
872 case NVPTX::V4I16_Select: return NVPTX::SELECTi16rr;
873 case NVPTX::V4I32_Select: return NVPTX::SELECTi32rr;
874 case NVPTX::V4I8_Select: return NVPTX::SELECTi8rr;
875 case NVPTX::V4f32Extract: return NVPTX::FMOV32rr;
876 case NVPTX::V4f32Insert: return NVPTX::FMOV32rr;
877 case NVPTX::V4f32Mov: return NVPTX::FMOV32rr;
878 case NVPTX::V4i16Extract: return NVPTX::IMOV16rr;
879 case NVPTX::V4i16Insert: return NVPTX::IMOV16rr;
880 case NVPTX::V4i16Mov: return NVPTX::IMOV16rr;
881 case NVPTX::V4i32Extract: return NVPTX::IMOV32rr;
882 case NVPTX::V4i32Insert: return NVPTX::IMOV32rr;
883 case NVPTX::V4i32Mov: return NVPTX::IMOV32rr;
884 case NVPTX::V4i8Extract: return NVPTX::IMOV8rr;
885 case NVPTX::V4i8Insert: return NVPTX::IMOV8rr;
886 case NVPTX::V4i8Mov: return NVPTX::IMOV8rr;
887 case NVPTX::VAddV2I16: return NVPTX::ADDi16rr;
888 case NVPTX::VAddV2I32: return NVPTX::ADDi32rr;
889 case NVPTX::VAddV2I64: return NVPTX::ADDi64rr;
890 case NVPTX::VAddV2I8: return NVPTX::ADDi8rr;
891 case NVPTX::VAddV4I16: return NVPTX::ADDi16rr;
892 case NVPTX::VAddV4I32: return NVPTX::ADDi32rr;
893 case NVPTX::VAddV4I8: return NVPTX::ADDi8rr;
894 case NVPTX::VAddfV2F32: return NVPTX::FADDf32rr;
895 case NVPTX::VAddfV2F32_ftz: return NVPTX::FADDf32rr_ftz;
896 case NVPTX::VAddfV2F64: return NVPTX::FADDf64rr;
897 case NVPTX::VAddfV4F32: return NVPTX::FADDf32rr;
898 case NVPTX::VAddfV4F32_ftz: return NVPTX::FADDf32rr_ftz;
899 case NVPTX::VAndV2I16: return NVPTX::ANDb16rr;
900 case NVPTX::VAndV2I32: return NVPTX::ANDb32rr;
901 case NVPTX::VAndV2I64: return NVPTX::ANDb64rr;
902 case NVPTX::VAndV2I8: return NVPTX::ANDb8rr;
903 case NVPTX::VAndV4I16: return NVPTX::ANDb16rr;
904 case NVPTX::VAndV4I32: return NVPTX::ANDb32rr;
905 case NVPTX::VAndV4I8: return NVPTX::ANDb8rr;
906 case NVPTX::VMulfV2F32_ftz: return NVPTX::FMULf32rr_ftz;
907 case NVPTX::VMulfV2F32: return NVPTX::FMULf32rr;
908 case NVPTX::VMulfV2F64: return NVPTX::FMULf64rr;
909 case NVPTX::VMulfV4F32_ftz: return NVPTX::FMULf32rr_ftz;
910 case NVPTX::VMulfV4F32: return NVPTX::FMULf32rr;
911 case NVPTX::VMultHSV2I16: return NVPTX::MULTHSi16rr;
912 case NVPTX::VMultHSV2I32: return NVPTX::MULTHSi32rr;
913 case NVPTX::VMultHSV2I64: return NVPTX::MULTHSi64rr;
914 case NVPTX::VMultHSV2I8: return NVPTX::MULTHSi8rr;
915 case NVPTX::VMultHSV4I16: return NVPTX::MULTHSi16rr;
916 case NVPTX::VMultHSV4I32: return NVPTX::MULTHSi32rr;
917 case NVPTX::VMultHSV4I8: return NVPTX::MULTHSi8rr;
918 case NVPTX::VMultHUV2I16: return NVPTX::MULTHUi16rr;
919 case NVPTX::VMultHUV2I32: return NVPTX::MULTHUi32rr;
920 case NVPTX::VMultHUV2I64: return NVPTX::MULTHUi64rr;
921 case NVPTX::VMultHUV2I8: return NVPTX::MULTHUi8rr;
922 case NVPTX::VMultHUV4I16: return NVPTX::MULTHUi16rr;
923 case NVPTX::VMultHUV4I32: return NVPTX::MULTHUi32rr;
924 case NVPTX::VMultHUV4I8: return NVPTX::MULTHUi8rr;
925 case NVPTX::VMultV2I16: return NVPTX::MULTi16rr;
926 case NVPTX::VMultV2I32: return NVPTX::MULTi32rr;
927 case NVPTX::VMultV2I64: return NVPTX::MULTi64rr;
928 case NVPTX::VMultV2I8: return NVPTX::MULTi8rr;
929 case NVPTX::VMultV4I16: return NVPTX::MULTi16rr;
930 case NVPTX::VMultV4I32: return NVPTX::MULTi32rr;
931 case NVPTX::VMultV4I8: return NVPTX::MULTi8rr;
932 case NVPTX::VNegV2I16: return NVPTX::INEG16;
933 case NVPTX::VNegV2I32: return NVPTX::INEG32;
934 case NVPTX::VNegV2I64: return NVPTX::INEG64;
935 case NVPTX::VNegV2I8: return NVPTX::INEG8;
936 case NVPTX::VNegV4I16: return NVPTX::INEG16;
937 case NVPTX::VNegV4I32: return NVPTX::INEG32;
938 case NVPTX::VNegV4I8: return NVPTX::INEG8;
939 case NVPTX::VNegv2f32: return NVPTX::FNEGf32;
940 case NVPTX::VNegv2f32_ftz: return NVPTX::FNEGf32_ftz;
941 case NVPTX::VNegv2f64: return NVPTX::FNEGf64;
942 case NVPTX::VNegv4f32: return NVPTX::FNEGf32;
943 case NVPTX::VNegv4f32_ftz: return NVPTX::FNEGf32_ftz;
944 case NVPTX::VNotV2I16: return NVPTX::NOT16;
945 case NVPTX::VNotV2I32: return NVPTX::NOT32;
946 case NVPTX::VNotV2I64: return NVPTX::NOT64;
947 case NVPTX::VNotV2I8: return NVPTX::NOT8;
948 case NVPTX::VNotV4I16: return NVPTX::NOT16;
949 case NVPTX::VNotV4I32: return NVPTX::NOT32;
950 case NVPTX::VNotV4I8: return NVPTX::NOT8;
951 case NVPTX::VOrV2I16: return NVPTX::ORb16rr;
952 case NVPTX::VOrV2I32: return NVPTX::ORb32rr;
953 case NVPTX::VOrV2I64: return NVPTX::ORb64rr;
954 case NVPTX::VOrV2I8: return NVPTX::ORb8rr;
955 case NVPTX::VOrV4I16: return NVPTX::ORb16rr;
956 case NVPTX::VOrV4I32: return NVPTX::ORb32rr;
957 case NVPTX::VOrV4I8: return NVPTX::ORb8rr;
958 case NVPTX::VSDivV2I16: return NVPTX::SDIVi16rr;
959 case NVPTX::VSDivV2I32: return NVPTX::SDIVi32rr;
960 case NVPTX::VSDivV2I64: return NVPTX::SDIVi64rr;
961 case NVPTX::VSDivV2I8: return NVPTX::SDIVi8rr;
962 case NVPTX::VSDivV4I16: return NVPTX::SDIVi16rr;
963 case NVPTX::VSDivV4I32: return NVPTX::SDIVi32rr;
964 case NVPTX::VSDivV4I8: return NVPTX::SDIVi8rr;
965 case NVPTX::VSRemV2I16: return NVPTX::SREMi16rr;
966 case NVPTX::VSRemV2I32: return NVPTX::SREMi32rr;
967 case NVPTX::VSRemV2I64: return NVPTX::SREMi64rr;
968 case NVPTX::VSRemV2I8: return NVPTX::SREMi8rr;
969 case NVPTX::VSRemV4I16: return NVPTX::SREMi16rr;
970 case NVPTX::VSRemV4I32: return NVPTX::SREMi32rr;
971 case NVPTX::VSRemV4I8: return NVPTX::SREMi8rr;
972 case NVPTX::VSubV2I16: return NVPTX::SUBi16rr;
973 case NVPTX::VSubV2I32: return NVPTX::SUBi32rr;
974 case NVPTX::VSubV2I64: return NVPTX::SUBi64rr;
975 case NVPTX::VSubV2I8: return NVPTX::SUBi8rr;
976 case NVPTX::VSubV4I16: return NVPTX::SUBi16rr;
977 case NVPTX::VSubV4I32: return NVPTX::SUBi32rr;
978 case NVPTX::VSubV4I8: return NVPTX::SUBi8rr;
979 case NVPTX::VSubfV2F32_ftz: return NVPTX::FSUBf32rr_ftz;
980 case NVPTX::VSubfV2F32: return NVPTX::FSUBf32rr;
981 case NVPTX::VSubfV2F64: return NVPTX::FSUBf64rr;
982 case NVPTX::VSubfV4F32_ftz: return NVPTX::FSUBf32rr_ftz;
983 case NVPTX::VSubfV4F32: return NVPTX::FSUBf32rr;
984 case NVPTX::VUDivV2I16: return NVPTX::UDIVi16rr;
985 case NVPTX::VUDivV2I32: return NVPTX::UDIVi32rr;
986 case NVPTX::VUDivV2I64: return NVPTX::UDIVi64rr;
987 case NVPTX::VUDivV2I8: return NVPTX::UDIVi8rr;
988 case NVPTX::VUDivV4I16: return NVPTX::UDIVi16rr;
989 case NVPTX::VUDivV4I32: return NVPTX::UDIVi32rr;
990 case NVPTX::VUDivV4I8: return NVPTX::UDIVi8rr;
991 case NVPTX::VURemV2I16: return NVPTX::UREMi16rr;
992 case NVPTX::VURemV2I32: return NVPTX::UREMi32rr;
993 case NVPTX::VURemV2I64: return NVPTX::UREMi64rr;
994 case NVPTX::VURemV2I8: return NVPTX::UREMi8rr;
995 case NVPTX::VURemV4I16: return NVPTX::UREMi16rr;
996 case NVPTX::VURemV4I32: return NVPTX::UREMi32rr;
997 case NVPTX::VURemV4I8: return NVPTX::UREMi8rr;
998 case NVPTX::VXorV2I16: return NVPTX::XORb16rr;
999 case NVPTX::VXorV2I32: return NVPTX::XORb32rr;
1000 case NVPTX::VXorV2I64: return NVPTX::XORb64rr;
1001 case NVPTX::VXorV2I8: return NVPTX::XORb8rr;
1002 case NVPTX::VXorV4I16: return NVPTX::XORb16rr;
1003 case NVPTX::VXorV4I32: return NVPTX::XORb32rr;
1004 case NVPTX::VXorV4I8: return NVPTX::XORb8rr;
1005 case NVPTX::VecSEQV2I16: return NVPTX::ISetSEQi16rr_toi16;
1006 case NVPTX::VecSEQV2I32: return NVPTX::ISetSEQi32rr_toi32;
1007 case NVPTX::VecSEQV2I64: return NVPTX::ISetSEQi64rr_toi64;
1008 case NVPTX::VecSEQV2I8: return NVPTX::ISetSEQi8rr_toi8;
1009 case NVPTX::VecSEQV4I16: return NVPTX::ISetSEQi16rr_toi16;
1010 case NVPTX::VecSEQV4I32: return NVPTX::ISetSEQi32rr_toi32;
1011 case NVPTX::VecSEQV4I8: return NVPTX::ISetSEQi8rr_toi8;
1012 case NVPTX::VecSGEV2I16: return NVPTX::ISetSGEi16rr_toi16;
1013 case NVPTX::VecSGEV2I32: return NVPTX::ISetSGEi32rr_toi32;
1014 case NVPTX::VecSGEV2I64: return NVPTX::ISetSGEi64rr_toi64;
1015 case NVPTX::VecSGEV2I8: return NVPTX::ISetSGEi8rr_toi8;
1016 case NVPTX::VecSGEV4I16: return NVPTX::ISetSGEi16rr_toi16;
1017 case NVPTX::VecSGEV4I32: return NVPTX::ISetSGEi32rr_toi32;
1018 case NVPTX::VecSGEV4I8: return NVPTX::ISetSGEi8rr_toi8;
1019 case NVPTX::VecSGTV2I16: return NVPTX::ISetSGTi16rr_toi16;
1020 case NVPTX::VecSGTV2I32: return NVPTX::ISetSGTi32rr_toi32;
1021 case NVPTX::VecSGTV2I64: return NVPTX::ISetSGTi64rr_toi64;
1022 case NVPTX::VecSGTV2I8: return NVPTX::ISetSGTi8rr_toi8;
1023 case NVPTX::VecSGTV4I16: return NVPTX::ISetSGTi16rr_toi16;
1024 case NVPTX::VecSGTV4I32: return NVPTX::ISetSGTi32rr_toi32;
1025 case NVPTX::VecSGTV4I8: return NVPTX::ISetSGTi8rr_toi8;
1026 case NVPTX::VecSLEV2I16: return NVPTX::ISetSLEi16rr_toi16;
1027 case NVPTX::VecSLEV2I32: return NVPTX::ISetSLEi32rr_toi32;
1028 case NVPTX::VecSLEV2I64: return NVPTX::ISetSLEi64rr_toi64;
1029 case NVPTX::VecSLEV2I8: return NVPTX::ISetSLEi8rr_toi8;
1030 case NVPTX::VecSLEV4I16: return NVPTX::ISetSLEi16rr_toi16;
1031 case NVPTX::VecSLEV4I32: return NVPTX::ISetSLEi32rr_toi32;
1032 case NVPTX::VecSLEV4I8: return NVPTX::ISetSLEi8rr_toi8;
1033 case NVPTX::VecSLTV2I16: return NVPTX::ISetSLTi16rr_toi16;
1034 case NVPTX::VecSLTV2I32: return NVPTX::ISetSLTi32rr_toi32;
1035 case NVPTX::VecSLTV2I64: return NVPTX::ISetSLTi64rr_toi64;
1036 case NVPTX::VecSLTV2I8: return NVPTX::ISetSLTi8rr_toi8;
1037 case NVPTX::VecSLTV4I16: return NVPTX::ISetSLTi16rr_toi16;
1038 case NVPTX::VecSLTV4I32: return NVPTX::ISetSLTi32rr_toi32;
1039 case NVPTX::VecSLTV4I8: return NVPTX::ISetSLTi8rr_toi8;
1040 case NVPTX::VecSNEV2I16: return NVPTX::ISetSNEi16rr_toi16;
1041 case NVPTX::VecSNEV2I32: return NVPTX::ISetSNEi32rr_toi32;
1042 case NVPTX::VecSNEV2I64: return NVPTX::ISetSNEi64rr_toi64;
1043 case NVPTX::VecSNEV2I8: return NVPTX::ISetSNEi8rr_toi8;
1044 case NVPTX::VecSNEV4I16: return NVPTX::ISetSNEi16rr_toi16;
1045 case NVPTX::VecSNEV4I32: return NVPTX::ISetSNEi32rr_toi32;
1046 case NVPTX::VecSNEV4I8: return NVPTX::ISetSNEi8rr_toi8;
1047 case NVPTX::VecShuffle_v2f32: return NVPTX::FMOV32rr;
1048 case NVPTX::VecShuffle_v2f64: return NVPTX::FMOV64rr;
1049 case NVPTX::VecShuffle_v2i16: return NVPTX::IMOV16rr;
1050 case NVPTX::VecShuffle_v2i32: return NVPTX::IMOV32rr;
1051 case NVPTX::VecShuffle_v2i64: return NVPTX::IMOV64rr;
1052 case NVPTX::VecShuffle_v2i8: return NVPTX::IMOV8rr;
1053 case NVPTX::VecShuffle_v4f32: return NVPTX::FMOV32rr;
1054 case NVPTX::VecShuffle_v4i16: return NVPTX::IMOV16rr;
1055 case NVPTX::VecShuffle_v4i32: return NVPTX::IMOV32rr;
1056 case NVPTX::VecShuffle_v4i8: return NVPTX::IMOV8rr;
1057 case NVPTX::VecUEQV2I16: return NVPTX::ISetUEQi16rr_toi16;
1058 case NVPTX::VecUEQV2I32: return NVPTX::ISetUEQi32rr_toi32;
1059 case NVPTX::VecUEQV2I64: return NVPTX::ISetUEQi64rr_toi64;
1060 case NVPTX::VecUEQV2I8: return NVPTX::ISetUEQi8rr_toi8;
1061 case NVPTX::VecUEQV4I16: return NVPTX::ISetUEQi16rr_toi16;
1062 case NVPTX::VecUEQV4I32: return NVPTX::ISetUEQi32rr_toi32;
1063 case NVPTX::VecUEQV4I8: return NVPTX::ISetUEQi8rr_toi8;
1064 case NVPTX::VecUGEV2I16: return NVPTX::ISetUGEi16rr_toi16;
1065 case NVPTX::VecUGEV2I32: return NVPTX::ISetUGEi32rr_toi32;
1066 case NVPTX::VecUGEV2I64: return NVPTX::ISetUGEi64rr_toi64;
1067 case NVPTX::VecUGEV2I8: return NVPTX::ISetUGEi8rr_toi8;
1068 case NVPTX::VecUGEV4I16: return NVPTX::ISetUGEi16rr_toi16;
1069 case NVPTX::VecUGEV4I32: return NVPTX::ISetUGEi32rr_toi32;
1070 case NVPTX::VecUGEV4I8: return NVPTX::ISetUGEi8rr_toi8;
1071 case NVPTX::VecUGTV2I16: return NVPTX::ISetUGTi16rr_toi16;
1072 case NVPTX::VecUGTV2I32: return NVPTX::ISetUGTi32rr_toi32;
1073 case NVPTX::VecUGTV2I64: return NVPTX::ISetUGTi64rr_toi64;
1074 case NVPTX::VecUGTV2I8: return NVPTX::ISetUGTi8rr_toi8;
1075 case NVPTX::VecUGTV4I16: return NVPTX::ISetUGTi16rr_toi16;
1076 case NVPTX::VecUGTV4I32: return NVPTX::ISetUGTi32rr_toi32;
1077 case NVPTX::VecUGTV4I8: return NVPTX::ISetUGTi8rr_toi8;
1078 case NVPTX::VecULEV2I16: return NVPTX::ISetULEi16rr_toi16;
1079 case NVPTX::VecULEV2I32: return NVPTX::ISetULEi32rr_toi32;
1080 case NVPTX::VecULEV2I64: return NVPTX::ISetULEi64rr_toi64;
1081 case NVPTX::VecULEV2I8: return NVPTX::ISetULEi8rr_toi8;
1082 case NVPTX::VecULEV4I16: return NVPTX::ISetULEi16rr_toi16;
1083 case NVPTX::VecULEV4I32: return NVPTX::ISetULEi32rr_toi32;
1084 case NVPTX::VecULEV4I8: return NVPTX::ISetULEi8rr_toi8;
1085 case NVPTX::VecULTV2I16: return NVPTX::ISetULTi16rr_toi16;
1086 case NVPTX::VecULTV2I32: return NVPTX::ISetULTi32rr_toi32;
1087 case NVPTX::VecULTV2I64: return NVPTX::ISetULTi64rr_toi64;
1088 case NVPTX::VecULTV2I8: return NVPTX::ISetULTi8rr_toi8;
1089 case NVPTX::VecULTV4I16: return NVPTX::ISetULTi16rr_toi16;
1090 case NVPTX::VecULTV4I32: return NVPTX::ISetULTi32rr_toi32;
1091 case NVPTX::VecULTV4I8: return NVPTX::ISetULTi8rr_toi8;
1092 case NVPTX::VecUNEV2I16: return NVPTX::ISetUNEi16rr_toi16;
1093 case NVPTX::VecUNEV2I32: return NVPTX::ISetUNEi32rr_toi32;
1094 case NVPTX::VecUNEV2I64: return NVPTX::ISetUNEi64rr_toi64;
1095 case NVPTX::VecUNEV2I8: return NVPTX::ISetUNEi8rr_toi8;
1096 case NVPTX::VecUNEV4I16: return NVPTX::ISetUNEi16rr_toi16;
1097 case NVPTX::VecUNEV4I32: return NVPTX::ISetUNEi32rr_toi32;
1098 case NVPTX::VecUNEV4I8: return NVPTX::ISetUNEi8rr_toi8;
1099 case NVPTX::INT_PTX_LDU_G_v2i8_32: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_32;
1100 case NVPTX::INT_PTX_LDU_G_v4i8_32: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_32;
1101 case NVPTX::INT_PTX_LDU_G_v2i16_32: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_32;
1102 case NVPTX::INT_PTX_LDU_G_v4i16_32: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_32;
1103 case NVPTX::INT_PTX_LDU_G_v2i32_32: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_32;
1104 case NVPTX::INT_PTX_LDU_G_v4i32_32: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_32;
1105 case NVPTX::INT_PTX_LDU_G_v2f32_32: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_32;
1106 case NVPTX::INT_PTX_LDU_G_v4f32_32: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_32;
1107 case NVPTX::INT_PTX_LDU_G_v2i64_32: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_32;
1108 case NVPTX::INT_PTX_LDU_G_v2f64_32: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_32;
1109 case NVPTX::INT_PTX_LDU_G_v2i8_64: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_64;
1110 case NVPTX::INT_PTX_LDU_G_v4i8_64: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_64;
1111 case NVPTX::INT_PTX_LDU_G_v2i16_64: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_64;
1112 case NVPTX::INT_PTX_LDU_G_v4i16_64: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_64;
1113 case NVPTX::INT_PTX_LDU_G_v2i32_64: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_64;
1114 case NVPTX::INT_PTX_LDU_G_v4i32_64: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_64;
1115 case NVPTX::INT_PTX_LDU_G_v2f32_64: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_64;
1116 case NVPTX::INT_PTX_LDU_G_v4f32_64: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_64;
1117 case NVPTX::INT_PTX_LDU_G_v2i64_64: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_64;
1118 case NVPTX::INT_PTX_LDU_G_v2f64_64: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_64;
1120 case NVPTX::LoadParamV4I32: return NVPTX::LoadParamScalar4I32;
1121 case NVPTX::LoadParamV4I16: return NVPTX::LoadParamScalar4I16;
1122 case NVPTX::LoadParamV4I8: return NVPTX::LoadParamScalar4I8;
1123 case NVPTX::LoadParamV2I64: return NVPTX::LoadParamScalar2I64;
1124 case NVPTX::LoadParamV2I32: return NVPTX::LoadParamScalar2I32;
1125 case NVPTX::LoadParamV2I16: return NVPTX::LoadParamScalar2I16;
1126 case NVPTX::LoadParamV2I8: return NVPTX::LoadParamScalar2I8;
1127 case NVPTX::LoadParamV4F32: return NVPTX::LoadParamScalar4F32;
1128 case NVPTX::LoadParamV2F32: return NVPTX::LoadParamScalar2F32;
1129 case NVPTX::LoadParamV2F64: return NVPTX::LoadParamScalar2F64;
1130 case NVPTX::StoreParamV4I32: return NVPTX::StoreParamScalar4I32;
1131 case NVPTX::StoreParamV4I16: return NVPTX::StoreParamScalar4I16;
1132 case NVPTX::StoreParamV4I8: return NVPTX::StoreParamScalar4I8;
1133 case NVPTX::StoreParamV2I64: return NVPTX::StoreParamScalar2I64;
1134 case NVPTX::StoreParamV2I32: return NVPTX::StoreParamScalar2I32;
1135 case NVPTX::StoreParamV2I16: return NVPTX::StoreParamScalar2I16;
1136 case NVPTX::StoreParamV2I8: return NVPTX::StoreParamScalar2I8;
1137 case NVPTX::StoreParamV4F32: return NVPTX::StoreParamScalar4F32;
1138 case NVPTX::StoreParamV2F32: return NVPTX::StoreParamScalar2F32;
1139 case NVPTX::StoreParamV2F64: return NVPTX::StoreParamScalar2F64;
1140 case NVPTX::StoreRetvalV4I32: return NVPTX::StoreRetvalScalar4I32;
1141 case NVPTX::StoreRetvalV4I16: return NVPTX::StoreRetvalScalar4I16;
1142 case NVPTX::StoreRetvalV4I8: return NVPTX::StoreRetvalScalar4I8;
1143 case NVPTX::StoreRetvalV2I64: return NVPTX::StoreRetvalScalar2I64;
1144 case NVPTX::StoreRetvalV2I32: return NVPTX::StoreRetvalScalar2I32;
1145 case NVPTX::StoreRetvalV2I16: return NVPTX::StoreRetvalScalar2I16;
1146 case NVPTX::StoreRetvalV2I8: return NVPTX::StoreRetvalScalar2I8;
1147 case NVPTX::StoreRetvalV4F32: return NVPTX::StoreRetvalScalar4F32;
1148 case NVPTX::StoreRetvalV2F32: return NVPTX::StoreRetvalScalar2F32;
1149 case NVPTX::StoreRetvalV2F64: return NVPTX::StoreRetvalScalar2F64;
1150 case NVPTX::VecI32toV4I8: return NVPTX::I32toV4I8;
1151 case NVPTX::VecI64toV4I16: return NVPTX::I64toV4I16;
1152 case NVPTX::VecI16toV2I8: return NVPTX::I16toV2I8;
1153 case NVPTX::VecI32toV2I16: return NVPTX::I32toV2I16;
1154 case NVPTX::VecI64toV2I32: return NVPTX::I64toV2I32;
1155 case NVPTX::VecF64toV2F32: return NVPTX::F64toV2F32;
1157 case NVPTX::LD_v2i8_avar: return NVPTX::LDV_i8_v2_avar;
1158 case NVPTX::LD_v2i8_areg: return NVPTX::LDV_i8_v2_areg;
1159 case NVPTX::LD_v2i8_ari: return NVPTX::LDV_i8_v2_ari;
1160 case NVPTX::LD_v2i8_asi: return NVPTX::LDV_i8_v2_asi;
1161 case NVPTX::LD_v4i8_avar: return NVPTX::LDV_i8_v4_avar;
1162 case NVPTX::LD_v4i8_areg: return NVPTX::LDV_i8_v4_areg;
1163 case NVPTX::LD_v4i8_ari: return NVPTX::LDV_i8_v4_ari;
1164 case NVPTX::LD_v4i8_asi: return NVPTX::LDV_i8_v4_asi;
1166 case NVPTX::LD_v2i16_avar: return NVPTX::LDV_i16_v2_avar;
1167 case NVPTX::LD_v2i16_areg: return NVPTX::LDV_i16_v2_areg;
1168 case NVPTX::LD_v2i16_ari: return NVPTX::LDV_i16_v2_ari;
1169 case NVPTX::LD_v2i16_asi: return NVPTX::LDV_i16_v2_asi;
1170 case NVPTX::LD_v4i16_avar: return NVPTX::LDV_i16_v4_avar;
1171 case NVPTX::LD_v4i16_areg: return NVPTX::LDV_i16_v4_areg;
1172 case NVPTX::LD_v4i16_ari: return NVPTX::LDV_i16_v4_ari;
1173 case NVPTX::LD_v4i16_asi: return NVPTX::LDV_i16_v4_asi;
1175 case NVPTX::LD_v2i32_avar: return NVPTX::LDV_i32_v2_avar;
1176 case NVPTX::LD_v2i32_areg: return NVPTX::LDV_i32_v2_areg;
1177 case NVPTX::LD_v2i32_ari: return NVPTX::LDV_i32_v2_ari;
1178 case NVPTX::LD_v2i32_asi: return NVPTX::LDV_i32_v2_asi;
1179 case NVPTX::LD_v4i32_avar: return NVPTX::LDV_i32_v4_avar;
1180 case NVPTX::LD_v4i32_areg: return NVPTX::LDV_i32_v4_areg;
1181 case NVPTX::LD_v4i32_ari: return NVPTX::LDV_i32_v4_ari;
1182 case NVPTX::LD_v4i32_asi: return NVPTX::LDV_i32_v4_asi;
1184 case NVPTX::LD_v2f32_avar: return NVPTX::LDV_f32_v2_avar;
1185 case NVPTX::LD_v2f32_areg: return NVPTX::LDV_f32_v2_areg;
1186 case NVPTX::LD_v2f32_ari: return NVPTX::LDV_f32_v2_ari;
1187 case NVPTX::LD_v2f32_asi: return NVPTX::LDV_f32_v2_asi;
1188 case NVPTX::LD_v4f32_avar: return NVPTX::LDV_f32_v4_avar;
1189 case NVPTX::LD_v4f32_areg: return NVPTX::LDV_f32_v4_areg;
1190 case NVPTX::LD_v4f32_ari: return NVPTX::LDV_f32_v4_ari;
1191 case NVPTX::LD_v4f32_asi: return NVPTX::LDV_f32_v4_asi;
1193 case NVPTX::LD_v2i64_avar: return NVPTX::LDV_i64_v2_avar;
1194 case NVPTX::LD_v2i64_areg: return NVPTX::LDV_i64_v2_areg;
1195 case NVPTX::LD_v2i64_ari: return NVPTX::LDV_i64_v2_ari;
1196 case NVPTX::LD_v2i64_asi: return NVPTX::LDV_i64_v2_asi;
1197 case NVPTX::LD_v2f64_avar: return NVPTX::LDV_f64_v2_avar;
1198 case NVPTX::LD_v2f64_areg: return NVPTX::LDV_f64_v2_areg;
1199 case NVPTX::LD_v2f64_ari: return NVPTX::LDV_f64_v2_ari;
1200 case NVPTX::LD_v2f64_asi: return NVPTX::LDV_f64_v2_asi;
1202 case NVPTX::ST_v2i8_avar: return NVPTX::STV_i8_v2_avar;
1203 case NVPTX::ST_v2i8_areg: return NVPTX::STV_i8_v2_areg;
1204 case NVPTX::ST_v2i8_ari: return NVPTX::STV_i8_v2_ari;
1205 case NVPTX::ST_v2i8_asi: return NVPTX::STV_i8_v2_asi;
1206 case NVPTX::ST_v4i8_avar: return NVPTX::STV_i8_v4_avar;
1207 case NVPTX::ST_v4i8_areg: return NVPTX::STV_i8_v4_areg;
1208 case NVPTX::ST_v4i8_ari: return NVPTX::STV_i8_v4_ari;
1209 case NVPTX::ST_v4i8_asi: return NVPTX::STV_i8_v4_asi;
1211 case NVPTX::ST_v2i16_avar: return NVPTX::STV_i16_v2_avar;
1212 case NVPTX::ST_v2i16_areg: return NVPTX::STV_i16_v2_areg;
1213 case NVPTX::ST_v2i16_ari: return NVPTX::STV_i16_v2_ari;
1214 case NVPTX::ST_v2i16_asi: return NVPTX::STV_i16_v2_asi;
1215 case NVPTX::ST_v4i16_avar: return NVPTX::STV_i16_v4_avar;
1216 case NVPTX::ST_v4i16_areg: return NVPTX::STV_i16_v4_areg;
1217 case NVPTX::ST_v4i16_ari: return NVPTX::STV_i16_v4_ari;
1218 case NVPTX::ST_v4i16_asi: return NVPTX::STV_i16_v4_asi;
1220 case NVPTX::ST_v2i32_avar: return NVPTX::STV_i32_v2_avar;
1221 case NVPTX::ST_v2i32_areg: return NVPTX::STV_i32_v2_areg;
1222 case NVPTX::ST_v2i32_ari: return NVPTX::STV_i32_v2_ari;
1223 case NVPTX::ST_v2i32_asi: return NVPTX::STV_i32_v2_asi;
1224 case NVPTX::ST_v4i32_avar: return NVPTX::STV_i32_v4_avar;
1225 case NVPTX::ST_v4i32_areg: return NVPTX::STV_i32_v4_areg;
1226 case NVPTX::ST_v4i32_ari: return NVPTX::STV_i32_v4_ari;
1227 case NVPTX::ST_v4i32_asi: return NVPTX::STV_i32_v4_asi;
1229 case NVPTX::ST_v2f32_avar: return NVPTX::STV_f32_v2_avar;
1230 case NVPTX::ST_v2f32_areg: return NVPTX::STV_f32_v2_areg;
1231 case NVPTX::ST_v2f32_ari: return NVPTX::STV_f32_v2_ari;
1232 case NVPTX::ST_v2f32_asi: return NVPTX::STV_f32_v2_asi;
1233 case NVPTX::ST_v4f32_avar: return NVPTX::STV_f32_v4_avar;
1234 case NVPTX::ST_v4f32_areg: return NVPTX::STV_f32_v4_areg;
1235 case NVPTX::ST_v4f32_ari: return NVPTX::STV_f32_v4_ari;
1236 case NVPTX::ST_v4f32_asi: return NVPTX::STV_f32_v4_asi;
1238 case NVPTX::ST_v2i64_avar: return NVPTX::STV_i64_v2_avar;
1239 case NVPTX::ST_v2i64_areg: return NVPTX::STV_i64_v2_areg;
1240 case NVPTX::ST_v2i64_ari: return NVPTX::STV_i64_v2_ari;
1241 case NVPTX::ST_v2i64_asi: return NVPTX::STV_i64_v2_asi;
1242 case NVPTX::ST_v2f64_avar: return NVPTX::STV_f64_v2_avar;
1243 case NVPTX::ST_v2f64_areg: return NVPTX::STV_f64_v2_areg;
1244 case NVPTX::ST_v2f64_ari: return NVPTX::STV_f64_v2_ari;
1245 case NVPTX::ST_v2f64_asi: return NVPTX::STV_f64_v2_asi;