1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Windows-specific.
10 // A parser for the module-definition file (.def file).
11 //
12 // The format of module-definition files are described in this document:
13 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Object/COFFModuleDefinition.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Object/COFFImportFile.h"
21 #include "llvm/Object/Error.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/Path.h"
24 
25 using namespace llvm::COFF;
26 using namespace llvm;
27 
28 namespace llvm {
29 namespace object {
30 
31 enum Kind {
32   Unknown,
33   Eof,
34   Identifier,
35   Comma,
36   Equal,
37   EqualEqual,
38   KwBase,
39   KwConstant,
40   KwData,
41   KwExports,
42   KwHeapsize,
43   KwLibrary,
44   KwName,
45   KwNoname,
46   KwPrivate,
47   KwStacksize,
48   KwVersion,
49 };
50 
51 struct Token {
Tokenllvm::object::Token52   explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
53   Kind K;
54   StringRef Value;
55 };
56 
isDecorated(StringRef Sym,bool MingwDef)57 static bool isDecorated(StringRef Sym, bool MingwDef) {
58   // In def files, the symbols can either be listed decorated or undecorated.
59   //
60   // - For cdecl symbols, only the undecorated form is allowed.
61   // - For fastcall and vectorcall symbols, both fully decorated or
62   //   undecorated forms can be present.
63   // - For stdcall symbols in non-MinGW environments, the decorated form is
64   //   fully decorated with leading underscore and trailing stack argument
65   //   size - like "_Func@0".
66   // - In MinGW def files, a decorated stdcall symbol does not include the
67   //   leading underscore though, like "Func@0".
68 
69   // This function controls whether a leading underscore should be added to
70   // the given symbol name or not. For MinGW, treat a stdcall symbol name such
71   // as "Func@0" as undecorated, i.e. a leading underscore must be added.
72   // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
73   // as decorated, i.e. don't add any more leading underscores.
74   // We can't check for a leading underscore here, since function names
75   // themselves can start with an underscore, while a second one still needs
76   // to be added.
77   return Sym.starts_with("@") || Sym.contains("@@") || Sym.starts_with("?") ||
78          (!MingwDef && Sym.contains('@'));
79 }
80 
81 class Lexer {
82 public:
Lexer(StringRef S)83   Lexer(StringRef S) : Buf(S) {}
84 
lex()85   Token lex() {
86     Buf = Buf.trim();
87     if (Buf.empty())
88       return Token(Eof);
89 
90     switch (Buf[0]) {
91     case '\0':
92       return Token(Eof);
93     case ';': {
94       size_t End = Buf.find('\n');
95       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
96       return lex();
97     }
98     case '=':
99       Buf = Buf.drop_front();
100       if (Buf.starts_with("=")) {
101         Buf = Buf.drop_front();
102         return Token(EqualEqual, "==");
103       }
104       return Token(Equal, "=");
105     case ',':
106       Buf = Buf.drop_front();
107       return Token(Comma, ",");
108     case '"': {
109       StringRef S;
110       std::tie(S, Buf) = Buf.substr(1).split('"');
111       return Token(Identifier, S);
112     }
113     default: {
114       size_t End = Buf.find_first_of("=,;\r\n \t\v");
115       StringRef Word = Buf.substr(0, End);
116       Kind K = llvm::StringSwitch<Kind>(Word)
117                    .Case("BASE", KwBase)
118                    .Case("CONSTANT", KwConstant)
119                    .Case("DATA", KwData)
120                    .Case("EXPORTS", KwExports)
121                    .Case("HEAPSIZE", KwHeapsize)
122                    .Case("LIBRARY", KwLibrary)
123                    .Case("NAME", KwName)
124                    .Case("NONAME", KwNoname)
125                    .Case("PRIVATE", KwPrivate)
126                    .Case("STACKSIZE", KwStacksize)
127                    .Case("VERSION", KwVersion)
128                    .Default(Identifier);
129       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
130       return Token(K, Word);
131     }
132     }
133   }
134 
135 private:
136   StringRef Buf;
137 };
138 
139 class Parser {
140 public:
Parser(StringRef S,MachineTypes M,bool B,bool AU)141   explicit Parser(StringRef S, MachineTypes M, bool B, bool AU)
142       : Lex(S), Machine(M), MingwDef(B), AddUnderscores(AU) {
143     if (Machine != IMAGE_FILE_MACHINE_I386)
144       AddUnderscores = false;
145   }
146 
parse()147   Expected<COFFModuleDefinition> parse() {
148     do {
149       if (Error Err = parseOne())
150         return std::move(Err);
151     } while (Tok.K != Eof);
152     return Info;
153   }
154 
155 private:
read()156   void read() {
157     if (Stack.empty()) {
158       Tok = Lex.lex();
159       return;
160     }
161     Tok = Stack.back();
162     Stack.pop_back();
163   }
164 
readAsInt(uint64_t * I)165   Error readAsInt(uint64_t *I) {
166     read();
167     if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
168       return createError("integer expected");
169     return Error::success();
170   }
171 
expect(Kind Expected,StringRef Msg)172   Error expect(Kind Expected, StringRef Msg) {
173     read();
174     if (Tok.K != Expected)
175       return createError(Msg);
176     return Error::success();
177   }
178 
unget()179   void unget() { Stack.push_back(Tok); }
180 
parseOne()181   Error parseOne() {
182     read();
183     switch (Tok.K) {
184     case Eof:
185       return Error::success();
186     case KwExports:
187       for (;;) {
188         read();
189         if (Tok.K != Identifier) {
190           unget();
191           return Error::success();
192         }
193         if (Error Err = parseExport())
194           return Err;
195       }
196     case KwHeapsize:
197       return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
198     case KwStacksize:
199       return parseNumbers(&Info.StackReserve, &Info.StackCommit);
200     case KwLibrary:
201     case KwName: {
202       bool IsDll = Tok.K == KwLibrary; // Check before parseName.
203       std::string Name;
204       if (Error Err = parseName(&Name, &Info.ImageBase))
205         return Err;
206 
207       Info.ImportName = Name;
208 
209       // Set the output file, but don't override /out if it was already passed.
210       if (Info.OutputFile.empty()) {
211         Info.OutputFile = Name;
212         // Append the appropriate file extension if not already present.
213         if (!sys::path::has_extension(Name))
214           Info.OutputFile += IsDll ? ".dll" : ".exe";
215       }
216 
217       return Error::success();
218     }
219     case KwVersion:
220       return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
221     default:
222       return createError("unknown directive: " + Tok.Value);
223     }
224   }
225 
parseExport()226   Error parseExport() {
227     COFFShortExport E;
228     E.Name = std::string(Tok.Value);
229     read();
230     if (Tok.K == Equal) {
231       read();
232       if (Tok.K != Identifier)
233         return createError("identifier expected, but got " + Tok.Value);
234       E.ExtName = E.Name;
235       E.Name = std::string(Tok.Value);
236     } else {
237       unget();
238     }
239 
240     if (AddUnderscores) {
241       if (!isDecorated(E.Name, MingwDef))
242         E.Name = (std::string("_").append(E.Name));
243       if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
244         E.ExtName = (std::string("_").append(E.ExtName));
245     }
246 
247     for (;;) {
248       read();
249       if (Tok.K == Identifier && Tok.Value[0] == '@') {
250         if (Tok.Value == "@") {
251           // "foo @ 10"
252           read();
253           Tok.Value.getAsInteger(10, E.Ordinal);
254         } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
255           // "foo \n @bar" - Not an ordinal modifier at all, but the next
256           // export (fastcall decorated) - complete the current one.
257           unget();
258           Info.Exports.push_back(E);
259           return Error::success();
260         }
261         // "foo @10"
262         read();
263         if (Tok.K == KwNoname) {
264           E.Noname = true;
265         } else {
266           unget();
267         }
268         continue;
269       }
270       if (Tok.K == KwData) {
271         E.Data = true;
272         continue;
273       }
274       if (Tok.K == KwConstant) {
275         E.Constant = true;
276         continue;
277       }
278       if (Tok.K == KwPrivate) {
279         E.Private = true;
280         continue;
281       }
282       if (Tok.K == EqualEqual) {
283         read();
284         E.AliasTarget = std::string(Tok.Value);
285         if (AddUnderscores && !isDecorated(E.AliasTarget, MingwDef))
286           E.AliasTarget = std::string("_").append(E.AliasTarget);
287         continue;
288       }
289       unget();
290       Info.Exports.push_back(E);
291       return Error::success();
292     }
293   }
294 
295   // HEAPSIZE/STACKSIZE reserve[,commit]
parseNumbers(uint64_t * Reserve,uint64_t * Commit)296   Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
297     if (Error Err = readAsInt(Reserve))
298       return Err;
299     read();
300     if (Tok.K != Comma) {
301       unget();
302       Commit = nullptr;
303       return Error::success();
304     }
305     if (Error Err = readAsInt(Commit))
306       return Err;
307     return Error::success();
308   }
309 
310   // NAME outputPath [BASE=address]
parseName(std::string * Out,uint64_t * Baseaddr)311   Error parseName(std::string *Out, uint64_t *Baseaddr) {
312     read();
313     if (Tok.K == Identifier) {
314       *Out = std::string(Tok.Value);
315     } else {
316       *Out = "";
317       unget();
318       return Error::success();
319     }
320     read();
321     if (Tok.K == KwBase) {
322       if (Error Err = expect(Equal, "'=' expected"))
323         return Err;
324       if (Error Err = readAsInt(Baseaddr))
325         return Err;
326     } else {
327       unget();
328       *Baseaddr = 0;
329     }
330     return Error::success();
331   }
332 
333   // VERSION major[.minor]
parseVersion(uint32_t * Major,uint32_t * Minor)334   Error parseVersion(uint32_t *Major, uint32_t *Minor) {
335     read();
336     if (Tok.K != Identifier)
337       return createError("identifier expected, but got " + Tok.Value);
338     StringRef V1, V2;
339     std::tie(V1, V2) = Tok.Value.split('.');
340     if (V1.getAsInteger(10, *Major))
341       return createError("integer expected, but got " + Tok.Value);
342     if (V2.empty())
343       *Minor = 0;
344     else if (V2.getAsInteger(10, *Minor))
345       return createError("integer expected, but got " + Tok.Value);
346     return Error::success();
347   }
348 
349   Lexer Lex;
350   Token Tok;
351   std::vector<Token> Stack;
352   MachineTypes Machine;
353   COFFModuleDefinition Info;
354   bool MingwDef;
355   bool AddUnderscores;
356 };
357 
parseCOFFModuleDefinition(MemoryBufferRef MB,MachineTypes Machine,bool MingwDef,bool AddUnderscores)358 Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
359                                                          MachineTypes Machine,
360                                                          bool MingwDef,
361                                                          bool AddUnderscores) {
362   return Parser(MB.getBuffer(), Machine, MingwDef, AddUnderscores).parse();
363 }
364 
365 } // namespace object
366 } // namespace llvm
367