1 //===- ReaderWrappers.cpp - Parse bytecode from file or buffer -----------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the LLVM research group and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements loading and parsing a bytecode file and parsing a
11 // bytecode module from a given buffer.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Bytecode/Analyzer.h"
16 #include "llvm/Bytecode/Reader.h"
18 #include "llvm/Module.h"
19 #include "llvm/Instructions.h"
20 #include "llvm/Support/FileUtilities.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/Config/unistd.h"
26 //===----------------------------------------------------------------------===//
27 // BytecodeFileReader - Read from an mmap'able file descriptor.
31 /// BytecodeFileReader - parses a bytecode file from a file
33 class BytecodeFileReader : public BytecodeReader {
35 unsigned char *Buffer;
38 BytecodeFileReader(const BytecodeFileReader&); // Do not implement
39 void operator=(const BytecodeFileReader &BFR); // Do not implement
42 BytecodeFileReader(const std::string &Filename, llvm::BytecodeHandler* H=0);
43 ~BytecodeFileReader();
47 static std::string ErrnoMessage (int savedErrNum, std::string descr) {
48 return ::strerror(savedErrNum) + std::string(", while trying to ") + descr;
51 BytecodeFileReader::BytecodeFileReader(const std::string &Filename,
52 llvm::BytecodeHandler* H )
55 Buffer = (unsigned char*)ReadFileIntoAddressSpace(Filename, Length);
57 throw "Error reading file '" + Filename + "'.";
60 // Parse the bytecode we mmapped in
61 ParseBytecode(Buffer, Length, Filename);
63 UnmapFileFromAddressSpace(Buffer, Length);
68 BytecodeFileReader::~BytecodeFileReader() {
69 // Unmmap the bytecode...
70 UnmapFileFromAddressSpace(Buffer, Length);
73 //===----------------------------------------------------------------------===//
74 // BytecodeBufferReader - Read from a memory buffer
78 /// BytecodeBufferReader - parses a bytecode file from a buffer
80 class BytecodeBufferReader : public BytecodeReader {
82 const unsigned char *Buffer;
85 BytecodeBufferReader(const BytecodeBufferReader&); // Do not implement
86 void operator=(const BytecodeBufferReader &BFR); // Do not implement
89 BytecodeBufferReader(const unsigned char *Buf, unsigned Length,
90 const std::string &ModuleID,
91 llvm::BytecodeHandler* Handler = 0);
92 ~BytecodeBufferReader();
97 BytecodeBufferReader::BytecodeBufferReader(const unsigned char *Buf,
99 const std::string &ModuleID,
100 llvm::BytecodeHandler* H )
103 // If not aligned, allocate a new buffer to hold the bytecode...
104 const unsigned char *ParseBegin = 0;
105 if (reinterpret_cast<uint64_t>(Buf) & 3) {
106 Buffer = new unsigned char[Length+4];
107 unsigned Offset = 4 - ((intptr_t)Buffer & 3); // Make sure it's aligned
108 ParseBegin = Buffer + Offset;
109 memcpy((unsigned char*)ParseBegin, Buf, Length); // Copy it over
112 // If we don't need to copy it over, just use the caller's copy
113 ParseBegin = Buffer = Buf;
117 ParseBytecode(ParseBegin, Length, ModuleID);
119 if (MustDelete) delete [] Buffer;
124 BytecodeBufferReader::~BytecodeBufferReader() {
125 if (MustDelete) delete [] Buffer;
128 //===----------------------------------------------------------------------===//
129 // BytecodeStdinReader - Read bytecode from Standard Input
133 /// BytecodeStdinReader - parses a bytecode file from stdin
135 class BytecodeStdinReader : public BytecodeReader {
137 std::vector<unsigned char> FileData;
138 unsigned char *FileBuf;
140 BytecodeStdinReader(const BytecodeStdinReader&); // Do not implement
141 void operator=(const BytecodeStdinReader &BFR); // Do not implement
144 BytecodeStdinReader( llvm::BytecodeHandler* H = 0 );
148 BytecodeStdinReader::BytecodeStdinReader( BytecodeHandler* H )
152 unsigned char Buffer[4096*4];
154 // Read in all of the data from stdin, we cannot mmap stdin...
155 while ((BlockSize = ::read(0 /*stdin*/, Buffer, 4096*4))) {
157 throw ErrnoMessage(errno, "read from standard input");
159 FileData.insert(FileData.end(), Buffer, Buffer+BlockSize);
162 if (FileData.empty())
163 throw std::string("Standard Input empty!");
165 FileBuf = &FileData[0];
166 ParseBytecode(FileBuf, FileData.size(), "<stdin>");
169 //===----------------------------------------------------------------------===//
170 // Varargs transmogrification code...
173 // CheckVarargs - This is used to automatically translate old-style varargs to
174 // new style varargs for backwards compatibility.
175 static ModuleProvider *CheckVarargs(ModuleProvider *MP) {
176 Module *M = MP->getModule();
178 // Check to see if va_start takes arguments...
179 Function *F = M->getNamedFunction("llvm.va_start");
180 if (F == 0) return MP; // No varargs use, just return.
182 if (F->getFunctionType()->getNumParams() == 0)
183 return MP; // Modern varargs processing, just return.
185 // If we get to this point, we know that we have an old-style module.
186 // Materialize the whole thing to perform the rewriting.
187 MP->materializeModule();
189 // If the user is making use of obsolete varargs intrinsics, adjust them for
191 if (Function *F = M->getNamedFunction("llvm.va_start")) {
192 assert(F->asize() == 1 && "Obsolete va_start takes 1 argument!");
194 const Type *RetTy = F->getFunctionType()->getParamType(0);
195 RetTy = cast<PointerType>(RetTy)->getElementType();
196 Function *NF = M->getOrInsertFunction("llvm.va_start", RetTy, 0);
198 for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; )
199 if (CallInst *CI = dyn_cast<CallInst>(*I++)) {
200 Value *V = new CallInst(NF, "", CI);
201 new StoreInst(V, CI->getOperand(1), CI);
202 CI->getParent()->getInstList().erase(CI);
207 if (Function *F = M->getNamedFunction("llvm.va_end")) {
208 assert(F->asize() == 1 && "Obsolete va_end takes 1 argument!");
209 const Type *ArgTy = F->getFunctionType()->getParamType(0);
210 ArgTy = cast<PointerType>(ArgTy)->getElementType();
211 Function *NF = M->getOrInsertFunction("llvm.va_end", Type::VoidTy,
214 for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; )
215 if (CallInst *CI = dyn_cast<CallInst>(*I++)) {
216 Value *V = new LoadInst(CI->getOperand(1), "", CI);
217 new CallInst(NF, V, "", CI);
218 CI->getParent()->getInstList().erase(CI);
223 if (Function *F = M->getNamedFunction("llvm.va_copy")) {
224 assert(F->asize() == 2 && "Obsolete va_copy takes 2 argument!");
225 const Type *ArgTy = F->getFunctionType()->getParamType(0);
226 ArgTy = cast<PointerType>(ArgTy)->getElementType();
227 Function *NF = M->getOrInsertFunction("llvm.va_copy", ArgTy,
230 for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; )
231 if (CallInst *CI = dyn_cast<CallInst>(*I++)) {
232 Value *V = new CallInst(NF, CI->getOperand(2), "", CI);
233 new StoreInst(V, CI->getOperand(1), CI);
234 CI->getParent()->getInstList().erase(CI);
241 //===----------------------------------------------------------------------===//
243 //===----------------------------------------------------------------------===//
245 /// getBytecodeBufferModuleProvider - lazy function-at-a-time loading from a
248 llvm::getBytecodeBufferModuleProvider(const unsigned char *Buffer,
250 const std::string &ModuleID,
251 BytecodeHandler* H ) {
253 new BytecodeBufferReader(Buffer, Length, ModuleID, H));
256 /// ParseBytecodeBuffer - Parse a given bytecode buffer
258 Module *llvm::ParseBytecodeBuffer(const unsigned char *Buffer, unsigned Length,
259 const std::string &ModuleID,
260 std::string *ErrorStr){
262 std::auto_ptr<ModuleProvider>
263 AMP(getBytecodeBufferModuleProvider(Buffer, Length, ModuleID));
264 return AMP->releaseModule();
265 } catch (std::string &err) {
266 if (ErrorStr) *ErrorStr = err;
271 /// getBytecodeModuleProvider - lazy function-at-a-time loading from a file
273 ModuleProvider *llvm::getBytecodeModuleProvider(const std::string &Filename,
274 BytecodeHandler* H) {
275 if (Filename != std::string("-")) // Read from a file...
276 return CheckVarargs(new BytecodeFileReader(Filename,H));
277 else // Read from stdin
278 return CheckVarargs(new BytecodeStdinReader(H));
281 /// ParseBytecodeFile - Parse the given bytecode file
283 Module *llvm::ParseBytecodeFile(const std::string &Filename,
284 std::string *ErrorStr) {
286 std::auto_ptr<ModuleProvider> AMP(getBytecodeModuleProvider(Filename));
287 return AMP->releaseModule();
288 } catch (std::string &err) {
289 if (ErrorStr) *ErrorStr = err;
294 // AnalyzeBytecodeFile - analyze one file
295 Module* llvm::AnalyzeBytecodeFile(
296 const std::string &Filename, ///< File to analyze
297 BytecodeAnalysis& bca, ///< Statistical output
298 std::string *ErrorStr, ///< Error output
299 std::ostream* output ///< Dump output
303 BytecodeHandler* analyzerHandler =createBytecodeAnalyzerHandler(bca,output);
304 std::auto_ptr<ModuleProvider> AMP(
305 getBytecodeModuleProvider(Filename,analyzerHandler));
306 return AMP->releaseModule();
307 } catch (std::string &err) {
308 if (ErrorStr) *ErrorStr = err;
313 // AnalyzeBytecodeBuffer - analyze a buffer
314 Module* llvm::AnalyzeBytecodeBuffer(
315 const unsigned char* Buffer, ///< Pointer to start of bytecode buffer
316 unsigned Length, ///< Size of the bytecode buffer
317 const std::string& ModuleID, ///< Identifier for the module
318 BytecodeAnalysis& bca, ///< The results of the analysis
319 std::string* ErrorStr, ///< Errors, if any.
320 std::ostream* output ///< Dump output, if any
324 BytecodeHandler* hdlr = createBytecodeAnalyzerHandler(bca, output);
325 std::auto_ptr<ModuleProvider>
326 AMP(getBytecodeBufferModuleProvider(Buffer, Length, ModuleID, hdlr));
327 return AMP->releaseModule();
328 } catch (std::string &err) {
329 if (ErrorStr) *ErrorStr = err;
334 bool llvm::GetBytecodeDependentLibraries(const std::string &fname,
335 Module::LibraryListType& deplibs) {
337 std::auto_ptr<ModuleProvider> AMP( getBytecodeModuleProvider(fname));
338 Module* M = AMP->releaseModule();
340 deplibs = M->getLibraries();
350 void getSymbols(Module*M, std::vector<std::string>& symbols) {
351 // Loop over global variables
352 for (Module::giterator GI = M->gbegin(), GE=M->gend(); GI != GE; ++GI) {
353 if (GI->hasInitializer()) {
354 std::string name ( GI->getName() );
356 symbols.push_back(name);
361 //Loop over functions
362 for (Module::iterator FI = M->begin(), FE=M->end(); FI != FE; ++FI) {
363 if (!FI->isExternal()) {
364 std::string name ( FI->getName() );
366 symbols.push_back(name);
373 // Get just the externally visible defined symbols from the bytecode
374 bool llvm::GetBytecodeSymbols(const sys::Path& fName,
375 std::vector<std::string>& symbols) {
377 std::auto_ptr<ModuleProvider> AMP( getBytecodeModuleProvider(fName.get()));
379 // Get the module from the provider
380 Module* M = AMP->materializeModule();
383 getSymbols(M, symbols);
385 // Done with the module
394 llvm::GetBytecodeSymbols(const unsigned char*Buffer, unsigned Length,
395 const std::string& ModuleID,
396 std::vector<std::string>& symbols) {
398 ModuleProvider* MP = 0;
400 // Get the module provider
401 MP = getBytecodeBufferModuleProvider(Buffer, Length, ModuleID);
403 // Get the module from the provider
404 Module* M = MP->materializeModule();
407 getSymbols(M, symbols);
409 // Done with the module. Note that ModuleProvider will delete the
410 // Module when it is deleted. Also note that its the caller's responsibility
411 // to delete the ModuleProvider.
415 // We delete only the ModuleProvider here because its destructor will
416 // also delete the Module (we used materializeModule not releaseModule).