1 //===- gccld.cpp - LLVM 'ld' compatible linker ----------------------------===//
3 // This utility is intended to be compatible with GCC, and follows standard
4 // system 'ld' conventions. As such, the default output file is ./a.out.
5 // Additionally, this program outputs a shell script that is used to invoke LLI
6 // to execute the program. In this manner, the generated executable (a.out for
7 // example), is directly executable, whereas the bytecode file actually lives in
8 // the a.out.bc file generated by this program. Also, Force is on by default.
10 // Note that if someone (or a script) deletes the executable program generated,
11 // the .bc file will be left around. Considering that this is a temporary hack,
12 // I'm not to worried about this.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/Transforms/Utils/Linker.h"
17 #include "llvm/Module.h"
18 #include "llvm/PassManager.h"
19 #include "llvm/Bytecode/Reader.h"
20 #include "llvm/Bytecode/WriteBytecodePass.h"
21 #include "llvm/Transforms/IPO.h"
22 #include "llvm/Transforms/Scalar.h"
23 #include "Support/CommandLine.h"
24 #include "Support/Signals.h"
29 #include <sys/types.h> // For FileExists
34 InputFilenames(cl::Positional, cl::desc("<input bytecode files>"),
38 OutputFilename("o", cl::desc("Override output filename"), cl::init("a.out"),
39 cl::value_desc("filename"));
42 Verbose("v", cl::desc("Print information about actions taken"));
45 LibPaths("L", cl::desc("Specify a library search path"), cl::Prefix,
46 cl::value_desc("directory"));
49 Libraries("l", cl::desc("Specify libraries to link to"), cl::Prefix,
50 cl::value_desc("library prefix"));
53 Strip("s", cl::desc("Strip symbol info from executable"));
56 NoInternalize("disable-internalize",
57 cl::desc("Do not mark all symbols as internal"));
60 LinkAsLibrary("link-as-library", cl::desc("Link the .bc files together as a"
61 " library, not an executable"));
63 // Compatibility options that are ignored, but support by LD
65 CO3("soname", cl::Hidden, cl::desc("Compatibility option: ignored"));
67 CO4("version-script", cl::Hidden, cl::desc("Compatibility option: ignored"));
69 CO5("eh-frame-hdr", cl::Hidden, cl::desc("Compatibility option: ignored"));
72 // FileExists - Return true if the specified string is an openable file...
73 static inline bool FileExists(const std::string &FN) {
75 return stat(FN.c_str(), &StatBuf) != -1;
79 // LoadObject - Read the specified "object file", which should not search the
80 // library path to find it.
81 static inline std::auto_ptr<Module> LoadObject(const std::string &FN,
82 std::string &OutErrorMessage) {
83 if (Verbose) std::cerr << "Loading '" << FN << "'\n";
84 if (!FileExists(FN)) {
85 OutErrorMessage = "could not find input file '" + FN + "'!";
86 return std::auto_ptr<Module>();
89 std::string ErrorMessage;
90 Module *Result = ParseBytecodeFile(FN, &ErrorMessage);
91 if (Result) return std::auto_ptr<Module>(Result);
93 OutErrorMessage = "Bytecode file '" + FN + "' corrupt!";
94 if (ErrorMessage.size()) OutErrorMessage += ": " + ErrorMessage;
95 return std::auto_ptr<Module>();
99 static Module *LoadSingleLibraryObject(const std::string &Filename) {
100 std::string ErrorMessage;
101 std::auto_ptr<Module> M = LoadObject(Filename, ErrorMessage);
102 if (M.get() == 0 && Verbose) {
103 std::cerr << "Error loading '" + Filename + "'";
104 if (!ErrorMessage.empty()) std::cerr << ": " << ErrorMessage;
112 // LoadLibraryFromDirectory - This looks for a .a, .so, or .bc file in a
113 // particular directory. It returns true if no library is found, otherwise it
114 // puts the loaded modules into the Objects list, and sets isArchive to true if
115 // a .a file was loaded.
117 static inline bool LoadLibraryFromDirectory(const std::string &LibName,
118 const std::string &Directory,
119 std::vector<Module*> &Objects,
121 if (FileExists(Directory + "lib" + LibName + ".a")) {
122 std::string ErrorMessage;
123 if (Verbose) std::cerr << "Loading '" << Directory << LibName << ".a'\n";
124 if (!ReadArchiveFile(Directory + "lib" + LibName + ".a", Objects,
125 &ErrorMessage)) { // Read the archive file
127 return false; // Success!
131 std::cerr << "Error loading archive '" + Directory + "lib"+LibName+".a'";
132 if (!ErrorMessage.empty()) std::cerr << ": " << ErrorMessage;
137 if (FileExists(Directory + "lib" + LibName + ".so"))
138 if (Module *M = LoadSingleLibraryObject(Directory + "lib" + LibName+".so")){
140 Objects.push_back(M);
144 if (FileExists(Directory + "lib" + LibName + ".bc"))
145 if (Module *M = LoadSingleLibraryObject(Directory + "lib" + LibName+".bc")){
147 Objects.push_back(M);
153 // LoadLibrary - This searches for a .a, .so, or .bc file which provides the
154 // LLVM bytecode for the library. It returns true if no library is found,
155 // otherwise it puts the loaded modules into the Objects list, and sets
156 // isArchive to true if a .a file was loaded.
158 static inline bool LoadLibrary(const std::string &LibName,
159 std::vector<Module*> &Objects, bool &isArchive,
160 std::string &ErrorMessage) {
161 std::string Directory;
162 unsigned NextLibPathIdx = 0;
165 // Try loading from the current directory...
166 if (Verbose) std::cerr << " Looking in directory '" << Directory << "'\n";
167 if (!LoadLibraryFromDirectory(LibName, Directory, Objects, isArchive))
170 if (NextLibPathIdx == LibPaths.size()) break;
171 Directory = LibPaths[NextLibPathIdx++]+"/";
174 ErrorMessage = "error linking library '-l" + LibName+ "': library not found!";
178 static void GetAllDefinedSymbols(Module *M,
179 std::set<std::string> &DefinedSymbols) {
180 for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
181 if (I->hasName() && !I->isExternal() && !I->hasInternalLinkage())
182 DefinedSymbols.insert(I->getName());
183 for (Module::giterator I = M->gbegin(), E = M->gend(); I != E; ++I)
184 if (I->hasName() && !I->isExternal() && !I->hasInternalLinkage())
185 DefinedSymbols.insert(I->getName());
188 // GetAllUndefinedSymbols - This calculates the set of undefined symbols that
189 // still exist in an LLVM module. This is a bit tricky because there may be two
190 // symbols with the same name, but different LLVM types that will be resolved to
191 // each other, but aren't currently (thus we need to treat it as resolved).
193 static void GetAllUndefinedSymbols(Module *M,
194 std::set<std::string> &UndefinedSymbols) {
195 std::set<std::string> DefinedSymbols;
196 UndefinedSymbols.clear(); // Start out empty
198 for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
201 UndefinedSymbols.insert(I->getName());
202 else if (!I->hasInternalLinkage())
203 DefinedSymbols.insert(I->getName());
205 for (Module::giterator I = M->gbegin(), E = M->gend(); I != E; ++I)
208 UndefinedSymbols.insert(I->getName());
209 else if (!I->hasInternalLinkage())
210 DefinedSymbols.insert(I->getName());
213 // Prune out any defined symbols from the undefined symbols set...
214 for (std::set<std::string>::iterator I = UndefinedSymbols.begin();
215 I != UndefinedSymbols.end(); )
216 if (DefinedSymbols.count(*I))
217 UndefinedSymbols.erase(I++); // This symbol really is defined!
219 ++I; // Keep this symbol in the undefined symbols list
223 static bool LinkLibrary(Module *M, const std::string &LibName,
224 std::string &ErrorMessage) {
225 std::vector<Module*> Objects;
227 if (LoadLibrary(LibName, Objects, isArchive, ErrorMessage)) return true;
229 // Figure out which symbols are defined by all of the modules in the .a file
230 std::vector<std::set<std::string> > DefinedSymbols;
231 DefinedSymbols.resize(Objects.size());
232 for (unsigned i = 0; i != Objects.size(); ++i)
233 GetAllDefinedSymbols(Objects[i], DefinedSymbols[i]);
235 std::set<std::string> UndefinedSymbols;
236 GetAllUndefinedSymbols(M, UndefinedSymbols);
239 while (Linked) { // While we are linking in object files, loop.
242 for (unsigned i = 0; i != Objects.size(); ++i) {
243 // Consider whether we need to link in this module... we only need to
244 // link it in if it defines some symbol which is so far undefined.
246 const std::set<std::string> &DefSymbols = DefinedSymbols[i];
248 bool ObjectRequired = false;
249 for (std::set<std::string>::iterator I = UndefinedSymbols.begin(),
250 E = UndefinedSymbols.end(); I != E; ++I)
251 if (DefSymbols.count(*I)) {
253 std::cerr << " Found object providing symbol '" << *I << "'...\n";
254 ObjectRequired = true;
258 // We DO need to link this object into the program...
259 if (ObjectRequired) {
260 if (LinkModules(M, Objects[i], &ErrorMessage))
261 return true; // Couldn't link in the right object file...
263 // Since we have linked in this object, delete it from the list of
264 // objects to consider in this archive file.
265 std::swap(Objects[i], Objects.back());
266 std::swap(DefinedSymbols[i], DefinedSymbols.back());
268 DefinedSymbols.pop_back();
269 --i; // Do not skip an entry
271 // The undefined symbols set should have shrunk.
272 GetAllUndefinedSymbols(M, UndefinedSymbols);
273 Linked = true; // We have linked something in!
281 static int PrintAndReturn(const char *progname, const std::string &Message,
282 const std::string &Extra = "") {
283 std::cerr << progname << Extra << ": " << Message << "\n";
288 int main(int argc, char **argv) {
289 cl::ParseCommandLineOptions(argc, argv, " llvm linker for GCC\n");
291 std::string ErrorMessage;
292 std::auto_ptr<Module> Composite(LoadObject(InputFilenames[0], ErrorMessage));
293 if (Composite.get() == 0)
294 return PrintAndReturn(argv[0], ErrorMessage);
296 for (unsigned i = 1; i < InputFilenames.size(); ++i) {
297 std::auto_ptr<Module> M(LoadObject(InputFilenames[i], ErrorMessage));
299 return PrintAndReturn(argv[0], ErrorMessage);
301 if (Verbose) std::cerr << "Linking in '" << InputFilenames[i] << "'\n";
303 if (LinkModules(Composite.get(), M.get(), &ErrorMessage))
304 return PrintAndReturn(argv[0], ErrorMessage,
305 ": error linking in '" + InputFilenames[i] + "'");
308 // Link in all of the libraries next...
309 for (unsigned i = 0; i != Libraries.size(); ++i) {
310 if (Verbose) std::cerr << "Linking in library: -l" << Libraries[i] << "\n";
311 if (LinkLibrary(Composite.get(), Libraries[i], ErrorMessage))
312 return PrintAndReturn(argv[0], ErrorMessage);
315 // In addition to just linking the input from GCC, we also want to spiff it up
316 // a little bit. Do this now.
320 // Linking modules together can lead to duplicated global constants, only keep
321 // one copy of each constant...
323 Passes.add(createConstantMergePass());
325 // If the -s command line option was specified, strip the symbols out of the
326 // resulting program to make it smaller. -s is a GCC option that we are
330 Passes.add(createSymbolStrippingPass());
332 // Often if the programmer does not specify proper prototypes for the
333 // functions they are calling, they end up calling a vararg version of the
334 // function that does not get a body filled in (the real function has typed
335 // arguments). This pass merges the two functions.
337 Passes.add(createFunctionResolvingPass());
339 if (!NoInternalize) {
340 // Now that composite has been compiled, scan through the module, looking
341 // for a main function. If main is defined, mark all other functions
344 Passes.add(createInternalizePass());
347 // Now that we have optimized the program, discard unreachable functions...
349 Passes.add(createGlobalDCEPass());
351 // Add the pass that writes bytecode to the output file...
352 std::string RealBytecodeOutput = OutputFilename;
353 if (!LinkAsLibrary) RealBytecodeOutput += ".bc";
354 std::ofstream Out(RealBytecodeOutput.c_str());
356 return PrintAndReturn(argv[0], "error opening '" + RealBytecodeOutput +
358 Passes.add(new WriteBytecodePass(&Out)); // Write bytecode to file...
360 // Make sure that the Out file gets unlink'd from the disk if we get a SIGINT
361 RemoveFileOnSignal(RealBytecodeOutput);
363 // Run our queue of passes all at once now, efficiently.
364 Passes.run(*Composite.get());
367 if (!LinkAsLibrary) {
368 // Output the script to start the program...
369 std::ofstream Out2(OutputFilename.c_str());
371 return PrintAndReturn(argv[0], "error opening '" + OutputFilename +
373 Out2 << "#!/bin/sh\nlli -q -abort-on-exception $0.bc $*\n";
376 // Make the script executable...
377 chmod(OutputFilename.c_str(), 0755);