1 //===-- NVPTXReplaceImageHandles.cpp - Replace image handles for Fermi ----===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // On Fermi, image handles are not supported. To work around this, we traverse
11 // the machine code and replace image handles with concrete symbols. For this
12 // to work reliably, inlining of all function call must be performed.
14 //===----------------------------------------------------------------------===//
17 #include "NVPTXMachineFunctionInfo.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineFunctionPass.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include "llvm/ADT/DenseSet.h"
27 class NVPTXReplaceImageHandles : public MachineFunctionPass {
30 DenseSet<MachineInstr *> InstrsToRemove;
33 NVPTXReplaceImageHandles();
35 bool runOnMachineFunction(MachineFunction &MF) override;
37 bool processInstr(MachineInstr &MI);
38 void replaceImageHandle(MachineOperand &Op, MachineFunction &MF);
42 char NVPTXReplaceImageHandles::ID = 0;
44 NVPTXReplaceImageHandles::NVPTXReplaceImageHandles()
45 : MachineFunctionPass(ID) {}
47 bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) {
49 InstrsToRemove.clear();
51 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
53 for (MachineBasicBlock::iterator I = (*BI).begin(), E = (*BI).end();
55 MachineInstr &MI = *I;
56 Changed |= processInstr(MI);
60 // Now clean up any handle-access instructions
61 // This is needed in debug mode when code cleanup passes are not executed,
62 // but we need the handle access to be eliminated because they are not
63 // valid instructions when image handles are disabled.
64 for (DenseSet<MachineInstr *>::iterator I = InstrsToRemove.begin(),
65 E = InstrsToRemove.end(); I != E; ++I) {
66 (*I)->eraseFromParent();
72 bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
73 MachineFunction &MF = *MI.getParent()->getParent();
74 // Check if we have a surface/texture instruction
75 switch (MI.getOpcode()) {
76 default: return false;
77 case NVPTX::TEX_1D_F32_I32:
78 case NVPTX::TEX_1D_F32_F32:
79 case NVPTX::TEX_1D_F32_F32_LEVEL:
80 case NVPTX::TEX_1D_F32_F32_GRAD:
81 case NVPTX::TEX_1D_I32_I32:
82 case NVPTX::TEX_1D_I32_F32:
83 case NVPTX::TEX_1D_I32_F32_LEVEL:
84 case NVPTX::TEX_1D_I32_F32_GRAD:
85 case NVPTX::TEX_1D_ARRAY_F32_I32:
86 case NVPTX::TEX_1D_ARRAY_F32_F32:
87 case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL:
88 case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD:
89 case NVPTX::TEX_1D_ARRAY_I32_I32:
90 case NVPTX::TEX_1D_ARRAY_I32_F32:
91 case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL:
92 case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD:
93 case NVPTX::TEX_2D_F32_I32:
94 case NVPTX::TEX_2D_F32_F32:
95 case NVPTX::TEX_2D_F32_F32_LEVEL:
96 case NVPTX::TEX_2D_F32_F32_GRAD:
97 case NVPTX::TEX_2D_I32_I32:
98 case NVPTX::TEX_2D_I32_F32:
99 case NVPTX::TEX_2D_I32_F32_LEVEL:
100 case NVPTX::TEX_2D_I32_F32_GRAD:
101 case NVPTX::TEX_2D_ARRAY_F32_I32:
102 case NVPTX::TEX_2D_ARRAY_F32_F32:
103 case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL:
104 case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD:
105 case NVPTX::TEX_2D_ARRAY_I32_I32:
106 case NVPTX::TEX_2D_ARRAY_I32_F32:
107 case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL:
108 case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD:
109 case NVPTX::TEX_3D_F32_I32:
110 case NVPTX::TEX_3D_F32_F32:
111 case NVPTX::TEX_3D_F32_F32_LEVEL:
112 case NVPTX::TEX_3D_F32_F32_GRAD:
113 case NVPTX::TEX_3D_I32_I32:
114 case NVPTX::TEX_3D_I32_F32:
115 case NVPTX::TEX_3D_I32_F32_LEVEL:
116 case NVPTX::TEX_3D_I32_F32_GRAD: {
117 // This is a texture fetch, so operand 4 is a texref and operand 5 is
119 MachineOperand &TexHandle = MI.getOperand(4);
120 MachineOperand &SampHandle = MI.getOperand(5);
122 replaceImageHandle(TexHandle, MF);
123 replaceImageHandle(SampHandle, MF);
127 case NVPTX::SULD_1D_I8_TRAP:
128 case NVPTX::SULD_1D_I16_TRAP:
129 case NVPTX::SULD_1D_I32_TRAP:
130 case NVPTX::SULD_1D_ARRAY_I8_TRAP:
131 case NVPTX::SULD_1D_ARRAY_I16_TRAP:
132 case NVPTX::SULD_1D_ARRAY_I32_TRAP:
133 case NVPTX::SULD_2D_I8_TRAP:
134 case NVPTX::SULD_2D_I16_TRAP:
135 case NVPTX::SULD_2D_I32_TRAP:
136 case NVPTX::SULD_2D_ARRAY_I8_TRAP:
137 case NVPTX::SULD_2D_ARRAY_I16_TRAP:
138 case NVPTX::SULD_2D_ARRAY_I32_TRAP:
139 case NVPTX::SULD_3D_I8_TRAP:
140 case NVPTX::SULD_3D_I16_TRAP:
141 case NVPTX::SULD_3D_I32_TRAP: {
142 // This is a V1 surface load, so operand 1 is a surfref
143 MachineOperand &SurfHandle = MI.getOperand(1);
145 replaceImageHandle(SurfHandle, MF);
149 case NVPTX::SULD_1D_V2I8_TRAP:
150 case NVPTX::SULD_1D_V2I16_TRAP:
151 case NVPTX::SULD_1D_V2I32_TRAP:
152 case NVPTX::SULD_1D_ARRAY_V2I8_TRAP:
153 case NVPTX::SULD_1D_ARRAY_V2I16_TRAP:
154 case NVPTX::SULD_1D_ARRAY_V2I32_TRAP:
155 case NVPTX::SULD_2D_V2I8_TRAP:
156 case NVPTX::SULD_2D_V2I16_TRAP:
157 case NVPTX::SULD_2D_V2I32_TRAP:
158 case NVPTX::SULD_2D_ARRAY_V2I8_TRAP:
159 case NVPTX::SULD_2D_ARRAY_V2I16_TRAP:
160 case NVPTX::SULD_2D_ARRAY_V2I32_TRAP:
161 case NVPTX::SULD_3D_V2I8_TRAP:
162 case NVPTX::SULD_3D_V2I16_TRAP:
163 case NVPTX::SULD_3D_V2I32_TRAP: {
164 // This is a V2 surface load, so operand 2 is a surfref
165 MachineOperand &SurfHandle = MI.getOperand(2);
167 replaceImageHandle(SurfHandle, MF);
171 case NVPTX::SULD_1D_V4I8_TRAP:
172 case NVPTX::SULD_1D_V4I16_TRAP:
173 case NVPTX::SULD_1D_V4I32_TRAP:
174 case NVPTX::SULD_1D_ARRAY_V4I8_TRAP:
175 case NVPTX::SULD_1D_ARRAY_V4I16_TRAP:
176 case NVPTX::SULD_1D_ARRAY_V4I32_TRAP:
177 case NVPTX::SULD_2D_V4I8_TRAP:
178 case NVPTX::SULD_2D_V4I16_TRAP:
179 case NVPTX::SULD_2D_V4I32_TRAP:
180 case NVPTX::SULD_2D_ARRAY_V4I8_TRAP:
181 case NVPTX::SULD_2D_ARRAY_V4I16_TRAP:
182 case NVPTX::SULD_2D_ARRAY_V4I32_TRAP:
183 case NVPTX::SULD_3D_V4I8_TRAP:
184 case NVPTX::SULD_3D_V4I16_TRAP:
185 case NVPTX::SULD_3D_V4I32_TRAP: {
186 // This is a V4 surface load, so operand 4 is a surfref
187 MachineOperand &SurfHandle = MI.getOperand(4);
189 replaceImageHandle(SurfHandle, MF);
193 case NVPTX::SUST_B_1D_B8_TRAP:
194 case NVPTX::SUST_B_1D_B16_TRAP:
195 case NVPTX::SUST_B_1D_B32_TRAP:
196 case NVPTX::SUST_B_1D_V2B8_TRAP:
197 case NVPTX::SUST_B_1D_V2B16_TRAP:
198 case NVPTX::SUST_B_1D_V2B32_TRAP:
199 case NVPTX::SUST_B_1D_V4B8_TRAP:
200 case NVPTX::SUST_B_1D_V4B16_TRAP:
201 case NVPTX::SUST_B_1D_V4B32_TRAP:
202 case NVPTX::SUST_B_1D_ARRAY_B8_TRAP:
203 case NVPTX::SUST_B_1D_ARRAY_B16_TRAP:
204 case NVPTX::SUST_B_1D_ARRAY_B32_TRAP:
205 case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP:
206 case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP:
207 case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP:
208 case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP:
209 case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP:
210 case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP:
211 case NVPTX::SUST_B_2D_B8_TRAP:
212 case NVPTX::SUST_B_2D_B16_TRAP:
213 case NVPTX::SUST_B_2D_B32_TRAP:
214 case NVPTX::SUST_B_2D_V2B8_TRAP:
215 case NVPTX::SUST_B_2D_V2B16_TRAP:
216 case NVPTX::SUST_B_2D_V2B32_TRAP:
217 case NVPTX::SUST_B_2D_V4B8_TRAP:
218 case NVPTX::SUST_B_2D_V4B16_TRAP:
219 case NVPTX::SUST_B_2D_V4B32_TRAP:
220 case NVPTX::SUST_B_2D_ARRAY_B8_TRAP:
221 case NVPTX::SUST_B_2D_ARRAY_B16_TRAP:
222 case NVPTX::SUST_B_2D_ARRAY_B32_TRAP:
223 case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP:
224 case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP:
225 case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP:
226 case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP:
227 case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP:
228 case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP:
229 case NVPTX::SUST_B_3D_B8_TRAP:
230 case NVPTX::SUST_B_3D_B16_TRAP:
231 case NVPTX::SUST_B_3D_B32_TRAP:
232 case NVPTX::SUST_B_3D_V2B8_TRAP:
233 case NVPTX::SUST_B_3D_V2B16_TRAP:
234 case NVPTX::SUST_B_3D_V2B32_TRAP:
235 case NVPTX::SUST_B_3D_V4B8_TRAP:
236 case NVPTX::SUST_B_3D_V4B16_TRAP:
237 case NVPTX::SUST_B_3D_V4B32_TRAP:
238 case NVPTX::SUST_P_1D_B8_TRAP:
239 case NVPTX::SUST_P_1D_B16_TRAP:
240 case NVPTX::SUST_P_1D_B32_TRAP:
241 case NVPTX::SUST_P_1D_V2B8_TRAP:
242 case NVPTX::SUST_P_1D_V2B16_TRAP:
243 case NVPTX::SUST_P_1D_V2B32_TRAP:
244 case NVPTX::SUST_P_1D_V4B8_TRAP:
245 case NVPTX::SUST_P_1D_V4B16_TRAP:
246 case NVPTX::SUST_P_1D_V4B32_TRAP:
247 case NVPTX::SUST_P_1D_ARRAY_B8_TRAP:
248 case NVPTX::SUST_P_1D_ARRAY_B16_TRAP:
249 case NVPTX::SUST_P_1D_ARRAY_B32_TRAP:
250 case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP:
251 case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP:
252 case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP:
253 case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP:
254 case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP:
255 case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP:
256 case NVPTX::SUST_P_2D_B8_TRAP:
257 case NVPTX::SUST_P_2D_B16_TRAP:
258 case NVPTX::SUST_P_2D_B32_TRAP:
259 case NVPTX::SUST_P_2D_V2B8_TRAP:
260 case NVPTX::SUST_P_2D_V2B16_TRAP:
261 case NVPTX::SUST_P_2D_V2B32_TRAP:
262 case NVPTX::SUST_P_2D_V4B8_TRAP:
263 case NVPTX::SUST_P_2D_V4B16_TRAP:
264 case NVPTX::SUST_P_2D_V4B32_TRAP:
265 case NVPTX::SUST_P_2D_ARRAY_B8_TRAP:
266 case NVPTX::SUST_P_2D_ARRAY_B16_TRAP:
267 case NVPTX::SUST_P_2D_ARRAY_B32_TRAP:
268 case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP:
269 case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP:
270 case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP:
271 case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP:
272 case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP:
273 case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP:
274 case NVPTX::SUST_P_3D_B8_TRAP:
275 case NVPTX::SUST_P_3D_B16_TRAP:
276 case NVPTX::SUST_P_3D_B32_TRAP:
277 case NVPTX::SUST_P_3D_V2B8_TRAP:
278 case NVPTX::SUST_P_3D_V2B16_TRAP:
279 case NVPTX::SUST_P_3D_V2B32_TRAP:
280 case NVPTX::SUST_P_3D_V4B8_TRAP:
281 case NVPTX::SUST_P_3D_V4B16_TRAP:
282 case NVPTX::SUST_P_3D_V4B32_TRAP: {
283 // This is a surface store, so operand 0 is a surfref
284 MachineOperand &SurfHandle = MI.getOperand(0);
286 replaceImageHandle(SurfHandle, MF);
290 case NVPTX::TXQ_CHANNEL_ORDER:
291 case NVPTX::TXQ_CHANNEL_DATA_TYPE:
292 case NVPTX::TXQ_WIDTH:
293 case NVPTX::TXQ_HEIGHT:
294 case NVPTX::TXQ_DEPTH:
295 case NVPTX::TXQ_ARRAY_SIZE:
296 case NVPTX::TXQ_NUM_SAMPLES:
297 case NVPTX::TXQ_NUM_MIPMAP_LEVELS:
298 case NVPTX::SUQ_CHANNEL_ORDER:
299 case NVPTX::SUQ_CHANNEL_DATA_TYPE:
300 case NVPTX::SUQ_WIDTH:
301 case NVPTX::SUQ_HEIGHT:
302 case NVPTX::SUQ_DEPTH:
303 case NVPTX::SUQ_ARRAY_SIZE: {
304 // This is a query, so operand 1 is a surfref/texref
305 MachineOperand &Handle = MI.getOperand(1);
307 replaceImageHandle(Handle, MF);
314 void NVPTXReplaceImageHandles::
315 replaceImageHandle(MachineOperand &Op, MachineFunction &MF) {
316 const MachineRegisterInfo &MRI = MF.getRegInfo();
317 NVPTXMachineFunctionInfo *MFI = MF.getInfo<NVPTXMachineFunctionInfo>();
318 // Which instruction defines the handle?
319 MachineInstr *MI = MRI.getVRegDef(Op.getReg());
320 assert(MI && "No def for image handle vreg?");
321 MachineInstr &TexHandleDef = *MI;
323 switch (TexHandleDef.getOpcode()) {
324 case NVPTX::LD_i64_avar: {
325 // The handle is a parameter value being loaded, replace with the
327 assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!");
328 StringRef Sym = TexHandleDef.getOperand(6).getSymbolName();
329 std::string ParamBaseName = MF.getName();
330 ParamBaseName += "_param_";
331 assert(Sym.startswith(ParamBaseName) && "Invalid symbol reference");
332 unsigned Param = atoi(Sym.data()+ParamBaseName.size());
334 raw_string_ostream NewSymStr(NewSym);
335 NewSymStr << MF.getFunction()->getName() << "_param_" << Param;
336 Op.ChangeToImmediate(
337 MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str()));
338 InstrsToRemove.insert(&TexHandleDef);
341 case NVPTX::texsurf_handles: {
342 // The handle is a global variable, replace with the global variable name
343 assert(TexHandleDef.getOperand(1).isGlobal() && "Load is not a global!");
344 const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal();
345 assert(GV->hasName() && "Global sampler must be named!");
346 Op.ChangeToImmediate(MFI->getImageHandleSymbolIndex(GV->getName().data()));
347 InstrsToRemove.insert(&TexHandleDef);
351 llvm_unreachable("Unknown instruction operating on handle");
355 MachineFunctionPass *llvm::createNVPTXReplaceImageHandlesPass() {
356 return new NVPTXReplaceImageHandles();