1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Windows-specific.
10 // A parser for the module-definition file (.def file).
11 //
12 // The format of module-definition files are described in this document:
13 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Object/COFFModuleDefinition.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Object/COFFImportFile.h"
21 #include "llvm/Object/Error.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/Path.h"
24 
25 using namespace llvm::COFF;
26 using namespace llvm;
27 
28 namespace llvm {
29 namespace object {
30 
31 enum Kind {
32   Unknown,
33   Eof,
34   Identifier,
35   Comma,
36   Equal,
37   EqualEqual,
38   KwBase,
39   KwConstant,
40   KwData,
41   KwExports,
42   KwHeapsize,
43   KwLibrary,
44   KwName,
45   KwNoname,
46   KwPrivate,
47   KwStacksize,
48   KwVersion,
49 };
50 
51 struct Token {
52   explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
53   Kind K;
54   StringRef Value;
55 };
56 
57 static bool isDecorated(StringRef Sym, bool MingwDef) {
58   // In def files, the symbols can either be listed decorated or undecorated.
59   //
60   // - For cdecl symbols, only the undecorated form is allowed.
61   // - For fastcall and vectorcall symbols, both fully decorated or
62   //   undecorated forms can be present.
63   // - For stdcall symbols in non-MinGW environments, the decorated form is
64   //   fully decorated with leading underscore and trailing stack argument
65   //   size - like "_Func@0".
66   // - In MinGW def files, a decorated stdcall symbol does not include the
67   //   leading underscore though, like "Func@0".
68 
69   // This function controls whether a leading underscore should be added to
70   // the given symbol name or not. For MinGW, treat a stdcall symbol name such
71   // as "Func@0" as undecorated, i.e. a leading underscore must be added.
72   // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
73   // as decorated, i.e. don't add any more leading underscores.
74   // We can't check for a leading underscore here, since function names
75   // themselves can start with an underscore, while a second one still needs
76   // to be added.
77   return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
78          (!MingwDef && Sym.contains('@'));
79 }
80 
81 class Lexer {
82 public:
83   Lexer(StringRef S) : Buf(S) {}
84 
85   Token lex() {
86     Buf = Buf.trim();
87     if (Buf.empty())
88       return Token(Eof);
89 
90     switch (Buf[0]) {
91     case '\0':
92       return Token(Eof);
93     case ';': {
94       size_t End = Buf.find('\n');
95       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
96       return lex();
97     }
98     case '=':
99       Buf = Buf.drop_front();
100       if (Buf.startswith("=")) {
101         Buf = Buf.drop_front();
102         return Token(EqualEqual, "==");
103       }
104       return Token(Equal, "=");
105     case ',':
106       Buf = Buf.drop_front();
107       return Token(Comma, ",");
108     case '"': {
109       StringRef S;
110       std::tie(S, Buf) = Buf.substr(1).split('"');
111       return Token(Identifier, S);
112     }
113     default: {
114       size_t End = Buf.find_first_of("=,;\r\n \t\v");
115       StringRef Word = Buf.substr(0, End);
116       Kind K = llvm::StringSwitch<Kind>(Word)
117                    .Case("BASE", KwBase)
118                    .Case("CONSTANT", KwConstant)
119                    .Case("DATA", KwData)
120                    .Case("EXPORTS", KwExports)
121                    .Case("HEAPSIZE", KwHeapsize)
122                    .Case("LIBRARY", KwLibrary)
123                    .Case("NAME", KwName)
124                    .Case("NONAME", KwNoname)
125                    .Case("PRIVATE", KwPrivate)
126                    .Case("STACKSIZE", KwStacksize)
127                    .Case("VERSION", KwVersion)
128                    .Default(Identifier);
129       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
130       return Token(K, Word);
131     }
132     }
133   }
134 
135 private:
136   StringRef Buf;
137 };
138 
139 class Parser {
140 public:
141   explicit Parser(StringRef S, MachineTypes M, bool B)
142       : Lex(S), Machine(M), MingwDef(B) {}
143 
144   Expected<COFFModuleDefinition> parse() {
145     do {
146       if (Error Err = parseOne())
147         return std::move(Err);
148     } while (Tok.K != Eof);
149     return Info;
150   }
151 
152 private:
153   void read() {
154     if (Stack.empty()) {
155       Tok = Lex.lex();
156       return;
157     }
158     Tok = Stack.back();
159     Stack.pop_back();
160   }
161 
162   Error readAsInt(uint64_t *I) {
163     read();
164     if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
165       return createError("integer expected");
166     return Error::success();
167   }
168 
169   Error expect(Kind Expected, StringRef Msg) {
170     read();
171     if (Tok.K != Expected)
172       return createError(Msg);
173     return Error::success();
174   }
175 
176   void unget() { Stack.push_back(Tok); }
177 
178   Error parseOne() {
179     read();
180     switch (Tok.K) {
181     case Eof:
182       return Error::success();
183     case KwExports:
184       for (;;) {
185         read();
186         if (Tok.K != Identifier) {
187           unget();
188           return Error::success();
189         }
190         if (Error Err = parseExport())
191           return Err;
192       }
193     case KwHeapsize:
194       return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
195     case KwStacksize:
196       return parseNumbers(&Info.StackReserve, &Info.StackCommit);
197     case KwLibrary:
198     case KwName: {
199       bool IsDll = Tok.K == KwLibrary; // Check before parseName.
200       std::string Name;
201       if (Error Err = parseName(&Name, &Info.ImageBase))
202         return Err;
203 
204       Info.ImportName = Name;
205 
206       // Set the output file, but don't override /out if it was already passed.
207       if (Info.OutputFile.empty()) {
208         Info.OutputFile = Name;
209         // Append the appropriate file extension if not already present.
210         if (!sys::path::has_extension(Name))
211           Info.OutputFile += IsDll ? ".dll" : ".exe";
212       }
213 
214       return Error::success();
215     }
216     case KwVersion:
217       return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
218     default:
219       return createError("unknown directive: " + Tok.Value);
220     }
221   }
222 
223   Error parseExport() {
224     COFFShortExport E;
225     E.Name = std::string(Tok.Value);
226     read();
227     if (Tok.K == Equal) {
228       read();
229       if (Tok.K != Identifier)
230         return createError("identifier expected, but got " + Tok.Value);
231       E.ExtName = E.Name;
232       E.Name = std::string(Tok.Value);
233     } else {
234       unget();
235     }
236 
237     if (Machine == IMAGE_FILE_MACHINE_I386) {
238       if (!isDecorated(E.Name, MingwDef))
239         E.Name = (std::string("_").append(E.Name));
240       if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
241         E.ExtName = (std::string("_").append(E.ExtName));
242     }
243 
244     for (;;) {
245       read();
246       if (Tok.K == Identifier && Tok.Value[0] == '@') {
247         if (Tok.Value == "@") {
248           // "foo @ 10"
249           read();
250           Tok.Value.getAsInteger(10, E.Ordinal);
251         } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
252           // "foo \n @bar" - Not an ordinal modifier at all, but the next
253           // export (fastcall decorated) - complete the current one.
254           unget();
255           Info.Exports.push_back(E);
256           return Error::success();
257         }
258         // "foo @10"
259         read();
260         if (Tok.K == KwNoname) {
261           E.Noname = true;
262         } else {
263           unget();
264         }
265         continue;
266       }
267       if (Tok.K == KwData) {
268         E.Data = true;
269         continue;
270       }
271       if (Tok.K == KwConstant) {
272         E.Constant = true;
273         continue;
274       }
275       if (Tok.K == KwPrivate) {
276         E.Private = true;
277         continue;
278       }
279       if (Tok.K == EqualEqual) {
280         read();
281         E.AliasTarget = std::string(Tok.Value);
282         if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef))
283           E.AliasTarget = std::string("_").append(E.AliasTarget);
284         continue;
285       }
286       unget();
287       Info.Exports.push_back(E);
288       return Error::success();
289     }
290   }
291 
292   // HEAPSIZE/STACKSIZE reserve[,commit]
293   Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
294     if (Error Err = readAsInt(Reserve))
295       return Err;
296     read();
297     if (Tok.K != Comma) {
298       unget();
299       Commit = nullptr;
300       return Error::success();
301     }
302     if (Error Err = readAsInt(Commit))
303       return Err;
304     return Error::success();
305   }
306 
307   // NAME outputPath [BASE=address]
308   Error parseName(std::string *Out, uint64_t *Baseaddr) {
309     read();
310     if (Tok.K == Identifier) {
311       *Out = std::string(Tok.Value);
312     } else {
313       *Out = "";
314       unget();
315       return Error::success();
316     }
317     read();
318     if (Tok.K == KwBase) {
319       if (Error Err = expect(Equal, "'=' expected"))
320         return Err;
321       if (Error Err = readAsInt(Baseaddr))
322         return Err;
323     } else {
324       unget();
325       *Baseaddr = 0;
326     }
327     return Error::success();
328   }
329 
330   // VERSION major[.minor]
331   Error parseVersion(uint32_t *Major, uint32_t *Minor) {
332     read();
333     if (Tok.K != Identifier)
334       return createError("identifier expected, but got " + Tok.Value);
335     StringRef V1, V2;
336     std::tie(V1, V2) = Tok.Value.split('.');
337     if (V1.getAsInteger(10, *Major))
338       return createError("integer expected, but got " + Tok.Value);
339     if (V2.empty())
340       *Minor = 0;
341     else if (V2.getAsInteger(10, *Minor))
342       return createError("integer expected, but got " + Tok.Value);
343     return Error::success();
344   }
345 
346   Lexer Lex;
347   Token Tok;
348   std::vector<Token> Stack;
349   MachineTypes Machine;
350   COFFModuleDefinition Info;
351   bool MingwDef;
352 };
353 
354 Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
355                                                          MachineTypes Machine,
356                                                          bool MingwDef) {
357   return Parser(MB.getBuffer(), Machine, MingwDef).parse();
358 }
359 
360 } // namespace object
361 } // namespace llvm
362