obj2yaml, yaml2obj: Add support for COFF executables
[oota-llvm.git] / tools / yaml2obj / yaml2coff.cpp
1 //===- yaml2coff - Convert YAML to a COFF object file ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief The COFF component of yaml2obj.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "yaml2obj.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringMap.h"
20 #include "llvm/ADT/StringSwitch.h"
21 #include "llvm/Object/COFFYAML.h"
22 #include "llvm/Object/COFF.h"
23 #include "llvm/Support/Endian.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/SourceMgr.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <vector>
28
29 using namespace llvm;
30
31 /// This parses a yaml stream that represents a COFF object file.
32 /// See docs/yaml2obj for the yaml scheema.
33 struct COFFParser {
34   COFFParser(COFFYAML::Object &Obj)
35       : Obj(Obj), SectionTableStart(0), SectionTableSize(0) {
36     // A COFF string table always starts with a 4 byte size field. Offsets into
37     // it include this size, so allocate it now.
38     StringTable.append(4, char(0));
39   }
40
41   bool useBigObj() const {
42     return static_cast<int32_t>(Obj.Sections.size()) >
43            COFF::MaxNumberOfSections16;
44   }
45
46   bool isPE() const { return Obj.OptionalHeader.hasValue(); }
47   bool is64Bit() const {
48     return Obj.Header.Machine == COFF::IMAGE_FILE_MACHINE_AMD64;
49   }
50
51   uint32_t getFileAlignment() const {
52     return Obj.OptionalHeader->Header.FileAlignment;
53   }
54
55   unsigned getHeaderSize() const {
56     return useBigObj() ? COFF::Header32Size : COFF::Header16Size;
57   }
58
59   unsigned getSymbolSize() const {
60     return useBigObj() ? COFF::Symbol32Size : COFF::Symbol16Size;
61   }
62
63   bool parseSections() {
64     for (std::vector<COFFYAML::Section>::iterator i = Obj.Sections.begin(),
65            e = Obj.Sections.end(); i != e; ++i) {
66       COFFYAML::Section &Sec = *i;
67
68       // If the name is less than 8 bytes, store it in place, otherwise
69       // store it in the string table.
70       StringRef Name = Sec.Name;
71
72       if (Name.size() <= COFF::NameSize) {
73         std::copy(Name.begin(), Name.end(), Sec.Header.Name);
74       } else {
75         // Add string to the string table and format the index for output.
76         unsigned Index = getStringIndex(Name);
77         std::string str = utostr(Index);
78         if (str.size() > 7) {
79           errs() << "String table got too large";
80           return false;
81         }
82         Sec.Header.Name[0] = '/';
83         std::copy(str.begin(), str.end(), Sec.Header.Name + 1);
84       }
85
86       Sec.Header.Characteristics |= (Log2_32(Sec.Alignment) + 1) << 20;
87     }
88     return true;
89   }
90
91   bool parseSymbols() {
92     for (std::vector<COFFYAML::Symbol>::iterator i = Obj.Symbols.begin(),
93            e = Obj.Symbols.end(); i != e; ++i) {
94       COFFYAML::Symbol &Sym = *i;
95
96       // If the name is less than 8 bytes, store it in place, otherwise
97       // store it in the string table.
98       StringRef Name = Sym.Name;
99       if (Name.size() <= COFF::NameSize) {
100         std::copy(Name.begin(), Name.end(), Sym.Header.Name);
101       } else {
102         // Add string to the string table and format the index for output.
103         unsigned Index = getStringIndex(Name);
104         *reinterpret_cast<support::aligned_ulittle32_t*>(
105             Sym.Header.Name + 4) = Index;
106       }
107
108       Sym.Header.Type = Sym.SimpleType;
109       Sym.Header.Type |= Sym.ComplexType << COFF::SCT_COMPLEX_TYPE_SHIFT;
110     }
111     return true;
112   }
113
114   bool parse() {
115     if (!parseSections())
116       return false;
117     if (!parseSymbols())
118       return false;
119     return true;
120   }
121
122   unsigned getStringIndex(StringRef Str) {
123     StringMap<unsigned>::iterator i = StringTableMap.find(Str);
124     if (i == StringTableMap.end()) {
125       unsigned Index = StringTable.size();
126       StringTable.append(Str.begin(), Str.end());
127       StringTable.push_back(0);
128       StringTableMap[Str] = Index;
129       return Index;
130     }
131     return i->second;
132   }
133
134   COFFYAML::Object &Obj;
135
136   StringMap<unsigned> StringTableMap;
137   std::string StringTable;
138   uint32_t SectionTableStart;
139   uint32_t SectionTableSize;
140 };
141
142 // Take a CP and assign addresses and sizes to everything. Returns false if the
143 // layout is not valid to do.
144 static bool layoutOptionalHeader(COFFParser &CP) {
145   if (!CP.isPE())
146     return true;
147   CP.Obj.Header.SizeOfOptionalHeader =
148       (CP.is64Bit() ? sizeof(object::pe32plus_header)
149                     : sizeof(object::pe32_header)) +
150       (sizeof(object::data_directory) * (COFF::NUM_DATA_DIRECTORIES + 1));
151   return true;
152 }
153
154 // Take a CP and assign addresses and sizes to everything. Returns false if the
155 // layout is not valid to do.
156 static bool layoutCOFF(COFFParser &CP) {
157   // The section table starts immediately after the header, including the
158   // optional header.
159   CP.SectionTableStart =
160       CP.getHeaderSize() + CP.Obj.Header.SizeOfOptionalHeader;
161   CP.SectionTableSize = COFF::SectionSize * CP.Obj.Sections.size();
162
163   uint32_t CurrentSectionDataOffset =
164       CP.SectionTableStart + CP.SectionTableSize;
165
166   // Assign each section data address consecutively.
167   for (COFFYAML::Section &S : CP.Obj.Sections) {
168     if (S.SectionData.binary_size() > 0) {
169       CurrentSectionDataOffset = RoundUpToAlignment(
170           CurrentSectionDataOffset, CP.isPE() ? CP.getFileAlignment() : 4);
171       S.Header.SizeOfRawData = S.SectionData.binary_size();
172       if (CP.isPE())
173         S.Header.SizeOfRawData =
174             RoundUpToAlignment(S.Header.SizeOfRawData, CP.getFileAlignment());
175       S.Header.PointerToRawData = CurrentSectionDataOffset;
176       CurrentSectionDataOffset += S.Header.SizeOfRawData;
177       if (!S.Relocations.empty()) {
178         S.Header.PointerToRelocations = CurrentSectionDataOffset;
179         S.Header.NumberOfRelocations = S.Relocations.size();
180         CurrentSectionDataOffset +=
181             S.Header.NumberOfRelocations * COFF::RelocationSize;
182       }
183     } else {
184       S.Header.SizeOfRawData = 0;
185       S.Header.PointerToRawData = 0;
186     }
187   }
188
189   uint32_t SymbolTableStart = CurrentSectionDataOffset;
190
191   // Calculate number of symbols.
192   uint32_t NumberOfSymbols = 0;
193   for (std::vector<COFFYAML::Symbol>::iterator i = CP.Obj.Symbols.begin(),
194                                                e = CP.Obj.Symbols.end();
195                                                i != e; ++i) {
196     uint32_t NumberOfAuxSymbols = 0;
197     if (i->FunctionDefinition)
198       NumberOfAuxSymbols += 1;
199     if (i->bfAndefSymbol)
200       NumberOfAuxSymbols += 1;
201     if (i->WeakExternal)
202       NumberOfAuxSymbols += 1;
203     if (!i->File.empty())
204       NumberOfAuxSymbols +=
205           (i->File.size() + CP.getSymbolSize() - 1) / CP.getSymbolSize();
206     if (i->SectionDefinition)
207       NumberOfAuxSymbols += 1;
208     if (i->CLRToken)
209       NumberOfAuxSymbols += 1;
210     i->Header.NumberOfAuxSymbols = NumberOfAuxSymbols;
211     NumberOfSymbols += 1 + NumberOfAuxSymbols;
212   }
213
214   // Store all the allocated start addresses in the header.
215   CP.Obj.Header.NumberOfSections = CP.Obj.Sections.size();
216   CP.Obj.Header.NumberOfSymbols = NumberOfSymbols;
217   if (NumberOfSymbols > 0 || CP.StringTable.size() > 4)
218     CP.Obj.Header.PointerToSymbolTable = SymbolTableStart;
219   else
220     CP.Obj.Header.PointerToSymbolTable = 0;
221
222   *reinterpret_cast<support::ulittle32_t *>(&CP.StringTable[0])
223     = CP.StringTable.size();
224
225   return true;
226 }
227
228 template <typename value_type>
229 struct binary_le_impl {
230   value_type Value;
231   binary_le_impl(value_type V) : Value(V) {}
232 };
233
234 template <typename value_type>
235 raw_ostream &operator <<( raw_ostream &OS
236                         , const binary_le_impl<value_type> &BLE) {
237   char Buffer[sizeof(BLE.Value)];
238   support::endian::write<value_type, support::little, support::unaligned>(
239     Buffer, BLE.Value);
240   OS.write(Buffer, sizeof(BLE.Value));
241   return OS;
242 }
243
244 template <typename value_type>
245 binary_le_impl<value_type> binary_le(value_type V) {
246   return binary_le_impl<value_type>(V);
247 }
248
249 template <size_t NumBytes>
250 struct zeros_impl {
251   zeros_impl() {}
252 };
253
254 template <size_t NumBytes>
255 raw_ostream &operator<<(raw_ostream &OS, const zeros_impl<NumBytes> &) {
256   char Buffer[NumBytes];
257   memset(Buffer, 0, sizeof(Buffer));
258   OS.write(Buffer, sizeof(Buffer));
259   return OS;
260 }
261
262 template <typename T>
263 zeros_impl<sizeof(T)> zeros(const T &) {
264   return zeros_impl<sizeof(T)>();
265 }
266
267 struct num_zeros_impl {
268   size_t N;
269   num_zeros_impl(size_t N) : N(N) {}
270 };
271
272 raw_ostream &operator<<(raw_ostream &OS, const num_zeros_impl &NZI) {
273   for (size_t I = 0; I != NZI.N; ++I)
274     OS.write(0);
275   return OS;
276 }
277
278 num_zeros_impl num_zeros(size_t N) {
279   num_zeros_impl NZI(N);
280   return NZI;
281 }
282
283 template <typename T>
284 static void initializeOptionalHeader(COFFParser &CP, uint16_t Magic, T Header) {
285   memset(Header, 0, sizeof(*Header));
286   Header->Magic = Magic;
287   Header->SectionAlignment = CP.Obj.OptionalHeader->Header.SectionAlignment;
288   uint32_t SizeOfCode = 0, SizeOfInitializedData = 0,
289            SizeOfUninitializedData = 0;
290   uint32_t SizeOfHeaders = RoundUpToAlignment(
291       CP.SectionTableStart + CP.SectionTableSize, Header->SectionAlignment);
292   uint32_t SizeOfImage = SizeOfHeaders;
293   for (const COFFYAML::Section &S : CP.Obj.Sections) {
294     if (S.Header.Characteristics & COFF::IMAGE_SCN_CNT_CODE)
295       SizeOfCode += S.Header.SizeOfRawData;
296     if (S.Header.Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
297       SizeOfInitializedData += S.Header.SizeOfRawData;
298     if (S.Header.Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
299       SizeOfUninitializedData += S.Header.SizeOfRawData;
300     if (S.Name.equals(".text"))
301       Header->BaseOfCode = S.Header.VirtualAddress;          // RVA
302     if (S.Header.VirtualAddress)
303       SizeOfImage +=
304           RoundUpToAlignment(S.Header.VirtualSize, Header->SectionAlignment);
305   }
306   Header->SizeOfCode = SizeOfCode;
307   Header->SizeOfInitializedData = SizeOfInitializedData;
308   Header->SizeOfUninitializedData = SizeOfUninitializedData;
309   Header->AddressOfEntryPoint =
310       CP.Obj.OptionalHeader->Header.AddressOfEntryPoint; // RVA
311   Header->ImageBase = CP.Obj.OptionalHeader->Header.ImageBase;
312   Header->FileAlignment = CP.Obj.OptionalHeader->Header.FileAlignment;
313   Header->MajorOperatingSystemVersion =
314       CP.Obj.OptionalHeader->Header.MajorOperatingSystemVersion;
315   Header->MinorOperatingSystemVersion =
316       CP.Obj.OptionalHeader->Header.MinorOperatingSystemVersion;
317   Header->MajorImageVersion =
318       CP.Obj.OptionalHeader->Header.MajorImageVersion;
319   Header->MinorImageVersion =
320       CP.Obj.OptionalHeader->Header.MinorImageVersion;
321   Header->MajorSubsystemVersion =
322       CP.Obj.OptionalHeader->Header.MajorSubsystemVersion;
323   Header->MinorSubsystemVersion =
324       CP.Obj.OptionalHeader->Header.MinorSubsystemVersion;
325   Header->SizeOfImage = SizeOfImage;
326   Header->SizeOfHeaders = SizeOfHeaders;
327   Header->Subsystem = CP.Obj.OptionalHeader->Header.Subsystem;
328   Header->DLLCharacteristics = CP.Obj.OptionalHeader->Header.DLLCharacteristics;
329   Header->SizeOfStackReserve = CP.Obj.OptionalHeader->Header.SizeOfStackReserve;
330   Header->SizeOfStackCommit = CP.Obj.OptionalHeader->Header.SizeOfStackCommit;
331   Header->SizeOfHeapReserve = CP.Obj.OptionalHeader->Header.SizeOfHeapReserve;
332   Header->SizeOfHeapCommit = CP.Obj.OptionalHeader->Header.SizeOfHeapCommit;
333   Header->NumberOfRvaAndSize = COFF::NUM_DATA_DIRECTORIES + 1;
334 }
335
336 static bool writeCOFF(COFFParser &CP, raw_ostream &OS) {
337   if (CP.isPE()) {
338     // PE files start with a DOS stub.
339     object::dos_header DH;
340     memset(&DH, 0, sizeof(DH));
341
342     // DOS EXEs start with "MZ" magic.
343     DH.Magic[0] = 'M';
344     DH.Magic[1] = 'Z';
345     // Initializing the AddressOfRelocationTable is strictly optional but
346     // mollifies certain tools which expect it to have a value greater than
347     // 0x40.
348     DH.AddressOfRelocationTable = sizeof(DH);
349     // This is the address of the PE signature.
350     DH.AddressOfNewExeHeader = 128;
351
352     // Write out our DOS stub.
353     OS.write(reinterpret_cast<char *>(&DH), sizeof(DH));
354     // Write padding until we reach the position of where our PE signature
355     // should live.
356     OS << num_zeros(DH.AddressOfNewExeHeader - sizeof(DH));
357     // Write out the PE signature.
358     OS.write(COFF::PEMagic, sizeof(COFF::PEMagic));
359   }
360   if (CP.useBigObj()) {
361     OS << binary_le(static_cast<uint16_t>(COFF::IMAGE_FILE_MACHINE_UNKNOWN))
362        << binary_le(static_cast<uint16_t>(0xffff))
363        << binary_le(static_cast<uint16_t>(COFF::BigObjHeader::MinBigObjectVersion))
364        << binary_le(CP.Obj.Header.Machine)
365        << binary_le(CP.Obj.Header.TimeDateStamp);
366     OS.write(COFF::BigObjMagic, sizeof(COFF::BigObjMagic));
367     OS << zeros(uint32_t(0))
368        << zeros(uint32_t(0))
369        << zeros(uint32_t(0))
370        << zeros(uint32_t(0))
371        << binary_le(CP.Obj.Header.NumberOfSections)
372        << binary_le(CP.Obj.Header.PointerToSymbolTable)
373        << binary_le(CP.Obj.Header.NumberOfSymbols);
374   } else {
375     OS << binary_le(CP.Obj.Header.Machine)
376        << binary_le(static_cast<int16_t>(CP.Obj.Header.NumberOfSections))
377        << binary_le(CP.Obj.Header.TimeDateStamp)
378        << binary_le(CP.Obj.Header.PointerToSymbolTable)
379        << binary_le(CP.Obj.Header.NumberOfSymbols)
380        << binary_le(CP.Obj.Header.SizeOfOptionalHeader)
381        << binary_le(CP.Obj.Header.Characteristics);
382   }
383   if (CP.isPE()) {
384     if (CP.is64Bit()) {
385       object::pe32plus_header PEH;
386       initializeOptionalHeader(CP, COFF::PE32Header::PE32_PLUS, &PEH);
387       OS.write(reinterpret_cast<char *>(&PEH), sizeof(PEH));
388     } else {
389       object::pe32_header PEH;
390       initializeOptionalHeader(CP, COFF::PE32Header::PE32, &PEH);
391       OS.write(reinterpret_cast<char *>(&PEH), sizeof(PEH));
392     }
393     for (const Optional<COFF::DataDirectory> &DD :
394          CP.Obj.OptionalHeader->DataDirectories) {
395       if (!DD.hasValue()) {
396         OS << zeros(uint32_t(0));
397         OS << zeros(uint32_t(0));
398       } else {
399         OS << binary_le(DD->RelativeVirtualAddress);
400         OS << binary_le(DD->Size);
401       }
402     }
403     OS << zeros(uint32_t(0));
404     OS << zeros(uint32_t(0));
405   }
406
407   // Output section table.
408   for (std::vector<COFFYAML::Section>::iterator i = CP.Obj.Sections.begin(),
409                                                 e = CP.Obj.Sections.end();
410                                                 i != e; ++i) {
411     OS.write(i->Header.Name, COFF::NameSize);
412     OS << binary_le(i->Header.VirtualSize)
413        << binary_le(i->Header.VirtualAddress)
414        << binary_le(i->Header.SizeOfRawData)
415        << binary_le(i->Header.PointerToRawData)
416        << binary_le(i->Header.PointerToRelocations)
417        << binary_le(i->Header.PointerToLineNumbers)
418        << binary_le(i->Header.NumberOfRelocations)
419        << binary_le(i->Header.NumberOfLineNumbers)
420        << binary_le(i->Header.Characteristics);
421   }
422
423   unsigned CurSymbol = 0;
424   StringMap<unsigned> SymbolTableIndexMap;
425   for (std::vector<COFFYAML::Symbol>::iterator I = CP.Obj.Symbols.begin(),
426                                                E = CP.Obj.Symbols.end();
427        I != E; ++I) {
428     SymbolTableIndexMap[I->Name] = CurSymbol;
429     CurSymbol += 1 + I->Header.NumberOfAuxSymbols;
430   }
431
432   // Output section data.
433   for (const COFFYAML::Section &S : CP.Obj.Sections) {
434     if (!S.Header.SizeOfRawData)
435       continue;
436     OS << num_zeros(S.Header.PointerToRawData - OS.tell());
437     S.SectionData.writeAsBinary(OS);
438     OS << num_zeros(S.Header.SizeOfRawData - S.SectionData.binary_size());
439     for (const COFFYAML::Relocation &R : S.Relocations) {
440       uint32_t SymbolTableIndex = SymbolTableIndexMap[R.SymbolName];
441       OS << binary_le(R.VirtualAddress)
442          << binary_le(SymbolTableIndex)
443          << binary_le(R.Type);
444     }
445   }
446
447   // Output symbol table.
448
449   for (std::vector<COFFYAML::Symbol>::const_iterator i = CP.Obj.Symbols.begin(),
450                                                      e = CP.Obj.Symbols.end();
451                                                      i != e; ++i) {
452     OS.write(i->Header.Name, COFF::NameSize);
453     OS << binary_le(i->Header.Value);
454     if (CP.useBigObj())
455        OS << binary_le(i->Header.SectionNumber);
456     else
457        OS << binary_le(static_cast<int16_t>(i->Header.SectionNumber));
458     OS << binary_le(i->Header.Type)
459        << binary_le(i->Header.StorageClass)
460        << binary_le(i->Header.NumberOfAuxSymbols);
461
462     if (i->FunctionDefinition)
463       OS << binary_le(i->FunctionDefinition->TagIndex)
464          << binary_le(i->FunctionDefinition->TotalSize)
465          << binary_le(i->FunctionDefinition->PointerToLinenumber)
466          << binary_le(i->FunctionDefinition->PointerToNextFunction)
467          << zeros(i->FunctionDefinition->unused)
468          << num_zeros(CP.getSymbolSize() - COFF::Symbol16Size);
469     if (i->bfAndefSymbol)
470       OS << zeros(i->bfAndefSymbol->unused1)
471          << binary_le(i->bfAndefSymbol->Linenumber)
472          << zeros(i->bfAndefSymbol->unused2)
473          << binary_le(i->bfAndefSymbol->PointerToNextFunction)
474          << zeros(i->bfAndefSymbol->unused3)
475          << num_zeros(CP.getSymbolSize() - COFF::Symbol16Size);
476     if (i->WeakExternal)
477       OS << binary_le(i->WeakExternal->TagIndex)
478          << binary_le(i->WeakExternal->Characteristics)
479          << zeros(i->WeakExternal->unused)
480          << num_zeros(CP.getSymbolSize() - COFF::Symbol16Size);
481     if (!i->File.empty()) {
482       unsigned SymbolSize = CP.getSymbolSize();
483       uint32_t NumberOfAuxRecords =
484           (i->File.size() + SymbolSize - 1) / SymbolSize;
485       uint32_t NumberOfAuxBytes = NumberOfAuxRecords * SymbolSize;
486       uint32_t NumZeros = NumberOfAuxBytes - i->File.size();
487       OS.write(i->File.data(), i->File.size());
488       OS << num_zeros(NumZeros);
489     }
490     if (i->SectionDefinition)
491       OS << binary_le(i->SectionDefinition->Length)
492          << binary_le(i->SectionDefinition->NumberOfRelocations)
493          << binary_le(i->SectionDefinition->NumberOfLinenumbers)
494          << binary_le(i->SectionDefinition->CheckSum)
495          << binary_le(static_cast<int16_t>(i->SectionDefinition->Number))
496          << binary_le(i->SectionDefinition->Selection)
497          << zeros(i->SectionDefinition->unused)
498          << binary_le(static_cast<int16_t>(i->SectionDefinition->Number >> 16))
499          << num_zeros(CP.getSymbolSize() - COFF::Symbol16Size);
500     if (i->CLRToken)
501       OS << binary_le(i->CLRToken->AuxType)
502          << zeros(i->CLRToken->unused1)
503          << binary_le(i->CLRToken->SymbolTableIndex)
504          << zeros(i->CLRToken->unused2)
505          << num_zeros(CP.getSymbolSize() - COFF::Symbol16Size);
506   }
507
508   // Output string table.
509   if (CP.Obj.Header.PointerToSymbolTable)
510     OS.write(&CP.StringTable[0], CP.StringTable.size());
511   return true;
512 }
513
514 int yaml2coff(yaml::Input &YIn, raw_ostream &Out) {
515   COFFYAML::Object Doc;
516   YIn >> Doc;
517   if (YIn.error()) {
518     errs() << "yaml2obj: Failed to parse YAML file!\n";
519     return 1;
520   }
521
522   COFFParser CP(Doc);
523   if (!CP.parse()) {
524     errs() << "yaml2obj: Failed to parse YAML file!\n";
525     return 1;
526   }
527
528   if (!layoutOptionalHeader(CP)) {
529     errs() << "yaml2obj: Failed to layout optional header for COFF file!\n";
530     return 1;
531   }
532   if (!layoutCOFF(CP)) {
533     errs() << "yaml2obj: Failed to layout COFF file!\n";
534     return 1;
535   }
536   if (!writeCOFF(CP, Out)) {
537     errs() << "yaml2obj: Failed to write COFF file!\n";
538     return 1;
539   }
540   return 0;
541 }