1 //===- Archive.cpp - ar File Format implementation --------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the ArchiveObjectFile class.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Object/Archive.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Support/Endian.h"
19 #include "llvm/Support/MemoryBuffer.h"
22 using namespace object;
24 static const char *const Magic = "!<arch>\n";
25 static const char *const ThinMagic = "!<thin>\n";
27 void Archive::anchor() { }
29 StringRef ArchiveMemberHeader::getName() const {
31 if (Name[0] == '/' || Name[0] == '#')
35 llvm::StringRef::size_type end =
36 llvm::StringRef(Name, sizeof(Name)).find(EndCond);
37 if (end == llvm::StringRef::npos)
39 assert(end <= sizeof(Name) && end > 0);
40 // Don't include the EndCond if there is one.
41 return llvm::StringRef(Name, end);
44 uint32_t ArchiveMemberHeader::getSize() const {
46 if (llvm::StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, Ret))
47 llvm_unreachable("Size is not a decimal number.");
51 sys::fs::perms ArchiveMemberHeader::getAccessMode() const {
53 if (StringRef(AccessMode, sizeof(AccessMode)).rtrim(" ").getAsInteger(8, Ret))
54 llvm_unreachable("Access mode is not an octal number.");
55 return static_cast<sys::fs::perms>(Ret);
58 sys::TimeValue ArchiveMemberHeader::getLastModified() const {
60 if (StringRef(LastModified, sizeof(LastModified)).rtrim(" ")
61 .getAsInteger(10, Seconds))
62 llvm_unreachable("Last modified time not a decimal number.");
65 Ret.fromEpochTime(Seconds);
69 unsigned ArchiveMemberHeader::getUID() const {
71 if (StringRef(UID, sizeof(UID)).rtrim(" ").getAsInteger(10, Ret))
72 llvm_unreachable("UID time not a decimal number.");
76 unsigned ArchiveMemberHeader::getGID() const {
78 if (StringRef(GID, sizeof(GID)).rtrim(" ").getAsInteger(10, Ret))
79 llvm_unreachable("GID time not a decimal number.");
83 Archive::Child::Child(const Archive *Parent, const char *Start)
88 const ArchiveMemberHeader *Header =
89 reinterpret_cast<const ArchiveMemberHeader *>(Start);
90 uint64_t Size = sizeof(ArchiveMemberHeader);
91 if (!Parent->IsThin || Header->getName() == "/" || Header->getName() == "//")
92 Size += Header->getSize();
93 Data = StringRef(Start, Size);
95 // Setup StartOfFile and PaddingBytes.
96 StartOfFile = sizeof(ArchiveMemberHeader);
97 // Don't include attached name.
98 StringRef Name = Header->getName();
99 if (Name.startswith("#1/")) {
101 if (Name.substr(3).rtrim(" ").getAsInteger(10, NameSize))
102 llvm_unreachable("Long name length is not an integer");
103 StartOfFile += NameSize;
107 uint64_t Archive::Child::getSize() const {
109 return getHeader()->getSize();
110 return Data.size() - StartOfFile;
113 uint64_t Archive::Child::getRawSize() const {
114 return getHeader()->getSize();
117 Archive::Child Archive::Child::getNext() const {
118 size_t SpaceToSkip = Data.size();
119 // If it's odd, add 1 to make it even.
123 const char *NextLoc = Data.data() + SpaceToSkip;
125 // Check to see if this is past the end of the archive.
126 if (NextLoc >= Parent->Data.getBufferEnd())
127 return Child(Parent, nullptr);
129 return Child(Parent, NextLoc);
132 uint64_t Archive::Child::getChildOffset() const {
133 const char *a = Parent->Data.getBuffer().data();
134 const char *c = Data.data();
135 uint64_t offset = c - a;
139 ErrorOr<StringRef> Archive::Child::getName() const {
140 StringRef name = getRawName();
141 // Check if it's a special name.
142 if (name[0] == '/') {
143 if (name.size() == 1) // Linker member.
145 if (name.size() == 2 && name[1] == '/') // String table.
150 if (name.substr(1).rtrim(" ").getAsInteger(10, offset))
151 llvm_unreachable("Long name offset is not an integer");
152 const char *addr = Parent->StringTable->Data.begin()
153 + sizeof(ArchiveMemberHeader)
156 if (Parent->StringTable == Parent->child_end()
157 || addr < (Parent->StringTable->Data.begin()
158 + sizeof(ArchiveMemberHeader))
159 || addr > (Parent->StringTable->Data.begin()
160 + sizeof(ArchiveMemberHeader)
161 + Parent->StringTable->getSize()))
162 return object_error::parse_failed;
164 // GNU long file names end with a /.
165 if (Parent->kind() == K_GNU) {
166 StringRef::size_type End = StringRef(addr).find('/');
167 return StringRef(addr, End);
169 return StringRef(addr);
170 } else if (name.startswith("#1/")) {
172 if (name.substr(3).rtrim(" ").getAsInteger(10, name_size))
173 llvm_unreachable("Long name length is not an ingeter");
174 return Data.substr(sizeof(ArchiveMemberHeader), name_size)
175 .rtrim(StringRef("\0", 1));
177 // It's a simple name.
178 if (name[name.size() - 1] == '/')
179 return name.substr(0, name.size() - 1);
183 ErrorOr<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
184 ErrorOr<StringRef> NameOrErr = getName();
185 if (std::error_code EC = NameOrErr.getError())
187 StringRef Name = NameOrErr.get();
188 return MemoryBufferRef(getBuffer(), Name);
191 ErrorOr<std::unique_ptr<Binary>>
192 Archive::Child::getAsBinary(LLVMContext *Context) const {
193 ErrorOr<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
194 if (std::error_code EC = BuffOrErr.getError())
197 return createBinary(BuffOrErr.get(), Context);
200 ErrorOr<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
202 std::unique_ptr<Archive> Ret(new Archive(Source, EC));
205 return std::move(Ret);
208 Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
209 : Binary(Binary::ID_Archive, Source), SymbolTable(child_end()) {
210 StringRef Buffer = Data.getBuffer();
211 // Check for sufficient magic.
212 if (Buffer.startswith(ThinMagic)) {
214 } else if (Buffer.startswith(Magic)) {
217 ec = object_error::invalid_file_type;
221 // Get the special members.
222 child_iterator i = child_begin(false);
223 child_iterator e = child_end();
226 ec = object_error::success;
230 StringRef Name = i->getRawName();
232 // Below is the pattern that is used to figure out the archive format
233 // GNU archive format
234 // First member : / (may exist, if it exists, points to the symbol table )
235 // Second member : // (may exist, if it exists, points to the string table)
236 // Note : The string table is used if the filename exceeds 15 characters
237 // BSD archive format
238 // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table)
239 // There is no string table, if the filename exceeds 15 characters or has a
240 // embedded space, the filename has #1/<size>, The size represents the size
241 // of the filename that needs to be read after the archive header
242 // COFF archive format
244 // Second member : / (provides a directory of symbols)
245 // Third member : // (may exist, if it exists, contains the string table)
246 // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present
247 // even if the string table is empty. However, lib.exe does not in fact
248 // seem to create the third member if there's no member whose filename
249 // exceeds 15 characters. So the third member is optional.
251 if (Name == "__.SYMDEF") {
256 ec = object_error::success;
260 if (Name.startswith("#1/")) {
262 // We know this is BSD, so getName will work since there is no string table.
263 ErrorOr<StringRef> NameOrErr = i->getName();
264 ec = NameOrErr.getError();
267 Name = NameOrErr.get();
268 if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
281 ec = object_error::parse_failed;
284 Name = i->getRawName();
292 ec = object_error::success;
296 if (Name[0] != '/') {
299 ec = object_error::success;
304 ec = object_error::parse_failed;
314 ec = object_error::success;
318 Name = i->getRawName();
326 ec = object_error::success;
329 Archive::child_iterator Archive::child_begin(bool SkipInternal) const {
330 if (Data.getBufferSize() == 8) // empty archive.
336 const char *Loc = Data.getBufferStart() + strlen(Magic);
341 Archive::child_iterator Archive::child_end() const {
342 return Child(this, nullptr);
345 StringRef Archive::Symbol::getName() const {
346 return Parent->SymbolTable->getBuffer().begin() + StringIndex;
349 ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const {
350 const char *Buf = Parent->SymbolTable->getBuffer().begin();
351 const char *Offsets = Buf + 4;
353 if (Parent->kind() == K_GNU) {
355 *(reinterpret_cast<const support::ubig32_t *>(Offsets) + SymbolIndex);
356 } else if (Parent->kind() == K_BSD) {
357 // The SymbolIndex is an index into the ranlib structs that start at
358 // Offsets (the first uint32_t is the number of bytes of the ranlib
359 // structs). The ranlib structs are a pair of uint32_t's the first
360 // being a string table offset and the second being the offset into
361 // the archive of the member that defines the symbol. Which is what
363 Offset = *(reinterpret_cast<const support::ulittle32_t *>(Offsets) +
364 (SymbolIndex * 2) + 1);
366 uint32_t MemberCount = *reinterpret_cast<const support::ulittle32_t*>(Buf);
369 Buf += sizeof(support::ulittle32_t) +
370 (MemberCount * sizeof(support::ulittle32_t));
372 uint32_t SymbolCount = *reinterpret_cast<const support::ulittle32_t*>(Buf);
374 if (SymbolIndex >= SymbolCount)
375 return object_error::parse_failed;
377 // Skip SymbolCount to get to the indices table.
378 const char *Indices = Buf + sizeof(support::ulittle32_t);
380 // Get the index of the offset in the file member offset table for this
382 uint16_t OffsetIndex =
383 *(reinterpret_cast<const support::ulittle16_t*>(Indices)
385 // Subtract 1 since OffsetIndex is 1 based.
388 if (OffsetIndex >= MemberCount)
389 return object_error::parse_failed;
391 Offset = *(reinterpret_cast<const support::ulittle32_t*>(Offsets)
395 const char *Loc = Parent->getData().begin() + Offset;
396 child_iterator Iter(Child(Parent, Loc));
400 Archive::Symbol Archive::Symbol::getNext() const {
402 if (Parent->kind() == K_BSD) {
403 // t.StringIndex is an offset from the start of the __.SYMDEF or
404 // "__.SYMDEF SORTED" member into the string table for the ranlib
405 // struct indexed by t.SymbolIndex . To change t.StringIndex to the
406 // offset in the string table for t.SymbolIndex+1 we subtract the
407 // its offset from the start of the string table for t.SymbolIndex
408 // and add the offset of the string table for t.SymbolIndex+1.
410 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
411 // which is the number of bytes of ranlib structs that follow. The ranlib
412 // structs are a pair of uint32_t's the first being a string table offset
413 // and the second being the offset into the archive of the member that
414 // define the symbol. After that the next uint32_t is the byte count of
415 // the string table followed by the string table.
416 const char *Buf = Parent->SymbolTable->getBuffer().begin();
417 uint32_t RanlibCount = 0;
418 RanlibCount = (*reinterpret_cast<const support::ulittle32_t *>(Buf)) /
419 (sizeof(uint32_t) * 2);
420 // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
421 // don't change the t.StringIndex as we don't want to reference a ranlib
423 if (t.SymbolIndex + 1 < RanlibCount) {
424 const char *Ranlibs = Buf + 4;
425 uint32_t CurRanStrx = 0;
426 uint32_t NextRanStrx = 0;
427 CurRanStrx = *(reinterpret_cast<const support::ulittle32_t *>(Ranlibs) +
428 (t.SymbolIndex * 2));
429 NextRanStrx = *(reinterpret_cast<const support::ulittle32_t *>(Ranlibs) +
430 ((t.SymbolIndex + 1) * 2));
431 t.StringIndex -= CurRanStrx;
432 t.StringIndex += NextRanStrx;
435 // Go to one past next null.
437 Parent->SymbolTable->getBuffer().find('\0', t.StringIndex) + 1;
443 Archive::symbol_iterator Archive::symbol_begin() const {
444 if (!hasSymbolTable())
445 return symbol_iterator(Symbol(this, 0, 0));
447 const char *buf = SymbolTable->getBuffer().begin();
448 if (kind() == K_GNU) {
449 uint32_t symbol_count = 0;
450 symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf);
451 buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
452 } else if (kind() == K_BSD) {
453 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
454 // which is the number of bytes of ranlib structs that follow. The ranlib
455 // structs are a pair of uint32_t's the first being a string table offset
456 // and the second being the offset into the archive of the member that
457 // define the symbol. After that the next uint32_t is the byte count of
458 // the string table followed by the string table.
459 uint32_t ranlib_count = 0;
460 ranlib_count = (*reinterpret_cast<const support::ulittle32_t *>(buf)) /
461 (sizeof(uint32_t) * 2);
462 const char *ranlibs = buf + 4;
463 uint32_t ran_strx = 0;
464 ran_strx = *(reinterpret_cast<const support::ulittle32_t *>(ranlibs));
465 buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
466 // Skip the byte count of the string table.
467 buf += sizeof(uint32_t);
470 uint32_t member_count = 0;
471 uint32_t symbol_count = 0;
472 member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
473 buf += 4 + (member_count * 4); // Skip offsets.
474 symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
475 buf += 4 + (symbol_count * 2); // Skip indices.
477 uint32_t string_start_offset = buf - SymbolTable->getBuffer().begin();
478 return symbol_iterator(Symbol(this, 0, string_start_offset));
481 Archive::symbol_iterator Archive::symbol_end() const {
482 if (!hasSymbolTable())
483 return symbol_iterator(Symbol(this, 0, 0));
485 const char *buf = SymbolTable->getBuffer().begin();
486 uint32_t symbol_count = 0;
487 if (kind() == K_GNU) {
488 symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf);
489 } else if (kind() == K_BSD) {
490 symbol_count = (*reinterpret_cast<const support::ulittle32_t *>(buf)) /
491 (sizeof(uint32_t) * 2);
493 uint32_t member_count = 0;
494 member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
495 buf += 4 + (member_count * 4); // Skip offsets.
496 symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
498 return symbol_iterator(Symbol(this, symbol_count, 0));
501 Archive::child_iterator Archive::findSym(StringRef name) const {
502 Archive::symbol_iterator bs = symbol_begin();
503 Archive::symbol_iterator es = symbol_end();
505 for (; bs != es; ++bs) {
506 StringRef SymName = bs->getName();
507 if (SymName == name) {
508 ErrorOr<Archive::child_iterator> ResultOrErr = bs->getMember();
509 // FIXME: Should we really eat the error?
510 if (ResultOrErr.getError())
512 return ResultOrErr.get();
518 bool Archive::hasSymbolTable() const {
519 return SymbolTable != child_end();