1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Windows-specific.
10 // A parser for the module-definition file (.def file).
11 //
12 // The format of module-definition files are described in this document:
13 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Object/COFFModuleDefinition.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Object/COFF.h"
21 #include "llvm/Object/COFFImportFile.h"
22 #include "llvm/Object/Error.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 using namespace llvm::COFF;
28 using namespace llvm;
29 
30 namespace llvm {
31 namespace object {
32 
33 enum Kind {
34   Unknown,
35   Eof,
36   Identifier,
37   Comma,
38   Equal,
39   EqualEqual,
40   KwBase,
41   KwConstant,
42   KwData,
43   KwExports,
44   KwHeapsize,
45   KwLibrary,
46   KwName,
47   KwNoname,
48   KwPrivate,
49   KwStacksize,
50   KwVersion,
51 };
52 
53 struct Token {
Tokenllvm::object::Token54   explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
55   Kind K;
56   StringRef Value;
57 };
58 
isDecorated(StringRef Sym,bool MingwDef)59 static bool isDecorated(StringRef Sym, bool MingwDef) {
60   // In def files, the symbols can either be listed decorated or undecorated.
61   //
62   // - For cdecl symbols, only the undecorated form is allowed.
63   // - For fastcall and vectorcall symbols, both fully decorated or
64   //   undecorated forms can be present.
65   // - For stdcall symbols in non-MinGW environments, the decorated form is
66   //   fully decorated with leading underscore and trailing stack argument
67   //   size - like "_Func@0".
68   // - In MinGW def files, a decorated stdcall symbol does not include the
69   //   leading underscore though, like "Func@0".
70 
71   // This function controls whether a leading underscore should be added to
72   // the given symbol name or not. For MinGW, treat a stdcall symbol name such
73   // as "Func@0" as undecorated, i.e. a leading underscore must be added.
74   // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
75   // as decorated, i.e. don't add any more leading underscores.
76   // We can't check for a leading underscore here, since function names
77   // themselves can start with an underscore, while a second one still needs
78   // to be added.
79   return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
80          (!MingwDef && Sym.contains('@'));
81 }
82 
83 class Lexer {
84 public:
Lexer(StringRef S)85   Lexer(StringRef S) : Buf(S) {}
86 
lex()87   Token lex() {
88     Buf = Buf.trim();
89     if (Buf.empty())
90       return Token(Eof);
91 
92     switch (Buf[0]) {
93     case '\0':
94       return Token(Eof);
95     case ';': {
96       size_t End = Buf.find('\n');
97       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
98       return lex();
99     }
100     case '=':
101       Buf = Buf.drop_front();
102       if (Buf.startswith("=")) {
103         Buf = Buf.drop_front();
104         return Token(EqualEqual, "==");
105       }
106       return Token(Equal, "=");
107     case ',':
108       Buf = Buf.drop_front();
109       return Token(Comma, ",");
110     case '"': {
111       StringRef S;
112       std::tie(S, Buf) = Buf.substr(1).split('"');
113       return Token(Identifier, S);
114     }
115     default: {
116       size_t End = Buf.find_first_of("=,;\r\n \t\v");
117       StringRef Word = Buf.substr(0, End);
118       Kind K = llvm::StringSwitch<Kind>(Word)
119                    .Case("BASE", KwBase)
120                    .Case("CONSTANT", KwConstant)
121                    .Case("DATA", KwData)
122                    .Case("EXPORTS", KwExports)
123                    .Case("HEAPSIZE", KwHeapsize)
124                    .Case("LIBRARY", KwLibrary)
125                    .Case("NAME", KwName)
126                    .Case("NONAME", KwNoname)
127                    .Case("PRIVATE", KwPrivate)
128                    .Case("STACKSIZE", KwStacksize)
129                    .Case("VERSION", KwVersion)
130                    .Default(Identifier);
131       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
132       return Token(K, Word);
133     }
134     }
135   }
136 
137 private:
138   StringRef Buf;
139 };
140 
141 class Parser {
142 public:
Parser(StringRef S,MachineTypes M,bool B)143   explicit Parser(StringRef S, MachineTypes M, bool B)
144       : Lex(S), Machine(M), MingwDef(B) {}
145 
parse()146   Expected<COFFModuleDefinition> parse() {
147     do {
148       if (Error Err = parseOne())
149         return std::move(Err);
150     } while (Tok.K != Eof);
151     return Info;
152   }
153 
154 private:
read()155   void read() {
156     if (Stack.empty()) {
157       Tok = Lex.lex();
158       return;
159     }
160     Tok = Stack.back();
161     Stack.pop_back();
162   }
163 
readAsInt(uint64_t * I)164   Error readAsInt(uint64_t *I) {
165     read();
166     if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
167       return createError("integer expected");
168     return Error::success();
169   }
170 
expect(Kind Expected,StringRef Msg)171   Error expect(Kind Expected, StringRef Msg) {
172     read();
173     if (Tok.K != Expected)
174       return createError(Msg);
175     return Error::success();
176   }
177 
unget()178   void unget() { Stack.push_back(Tok); }
179 
parseOne()180   Error parseOne() {
181     read();
182     switch (Tok.K) {
183     case Eof:
184       return Error::success();
185     case KwExports:
186       for (;;) {
187         read();
188         if (Tok.K != Identifier) {
189           unget();
190           return Error::success();
191         }
192         if (Error Err = parseExport())
193           return Err;
194       }
195     case KwHeapsize:
196       return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
197     case KwStacksize:
198       return parseNumbers(&Info.StackReserve, &Info.StackCommit);
199     case KwLibrary:
200     case KwName: {
201       bool IsDll = Tok.K == KwLibrary; // Check before parseName.
202       std::string Name;
203       if (Error Err = parseName(&Name, &Info.ImageBase))
204         return Err;
205 
206       Info.ImportName = Name;
207 
208       // Set the output file, but don't override /out if it was already passed.
209       if (Info.OutputFile.empty()) {
210         Info.OutputFile = Name;
211         // Append the appropriate file extension if not already present.
212         if (!sys::path::has_extension(Name))
213           Info.OutputFile += IsDll ? ".dll" : ".exe";
214       }
215 
216       return Error::success();
217     }
218     case KwVersion:
219       return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
220     default:
221       return createError("unknown directive: " + Tok.Value);
222     }
223   }
224 
parseExport()225   Error parseExport() {
226     COFFShortExport E;
227     E.Name = std::string(Tok.Value);
228     read();
229     if (Tok.K == Equal) {
230       read();
231       if (Tok.K != Identifier)
232         return createError("identifier expected, but got " + Tok.Value);
233       E.ExtName = E.Name;
234       E.Name = std::string(Tok.Value);
235     } else {
236       unget();
237     }
238 
239     if (Machine == IMAGE_FILE_MACHINE_I386) {
240       if (!isDecorated(E.Name, MingwDef))
241         E.Name = (std::string("_").append(E.Name));
242       if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
243         E.ExtName = (std::string("_").append(E.ExtName));
244     }
245 
246     for (;;) {
247       read();
248       if (Tok.K == Identifier && Tok.Value[0] == '@') {
249         if (Tok.Value == "@") {
250           // "foo @ 10"
251           read();
252           Tok.Value.getAsInteger(10, E.Ordinal);
253         } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
254           // "foo \n @bar" - Not an ordinal modifier at all, but the next
255           // export (fastcall decorated) - complete the current one.
256           unget();
257           Info.Exports.push_back(E);
258           return Error::success();
259         }
260         // "foo @10"
261         read();
262         if (Tok.K == KwNoname) {
263           E.Noname = true;
264         } else {
265           unget();
266         }
267         continue;
268       }
269       if (Tok.K == KwData) {
270         E.Data = true;
271         continue;
272       }
273       if (Tok.K == KwConstant) {
274         E.Constant = true;
275         continue;
276       }
277       if (Tok.K == KwPrivate) {
278         E.Private = true;
279         continue;
280       }
281       if (Tok.K == EqualEqual) {
282         read();
283         E.AliasTarget = std::string(Tok.Value);
284         if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef))
285           E.AliasTarget = std::string("_").append(E.AliasTarget);
286         continue;
287       }
288       unget();
289       Info.Exports.push_back(E);
290       return Error::success();
291     }
292   }
293 
294   // HEAPSIZE/STACKSIZE reserve[,commit]
parseNumbers(uint64_t * Reserve,uint64_t * Commit)295   Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
296     if (Error Err = readAsInt(Reserve))
297       return Err;
298     read();
299     if (Tok.K != Comma) {
300       unget();
301       Commit = nullptr;
302       return Error::success();
303     }
304     if (Error Err = readAsInt(Commit))
305       return Err;
306     return Error::success();
307   }
308 
309   // NAME outputPath [BASE=address]
parseName(std::string * Out,uint64_t * Baseaddr)310   Error parseName(std::string *Out, uint64_t *Baseaddr) {
311     read();
312     if (Tok.K == Identifier) {
313       *Out = std::string(Tok.Value);
314     } else {
315       *Out = "";
316       unget();
317       return Error::success();
318     }
319     read();
320     if (Tok.K == KwBase) {
321       if (Error Err = expect(Equal, "'=' expected"))
322         return Err;
323       if (Error Err = readAsInt(Baseaddr))
324         return Err;
325     } else {
326       unget();
327       *Baseaddr = 0;
328     }
329     return Error::success();
330   }
331 
332   // VERSION major[.minor]
parseVersion(uint32_t * Major,uint32_t * Minor)333   Error parseVersion(uint32_t *Major, uint32_t *Minor) {
334     read();
335     if (Tok.K != Identifier)
336       return createError("identifier expected, but got " + Tok.Value);
337     StringRef V1, V2;
338     std::tie(V1, V2) = Tok.Value.split('.');
339     if (V1.getAsInteger(10, *Major))
340       return createError("integer expected, but got " + Tok.Value);
341     if (V2.empty())
342       *Minor = 0;
343     else if (V2.getAsInteger(10, *Minor))
344       return createError("integer expected, but got " + Tok.Value);
345     return Error::success();
346   }
347 
348   Lexer Lex;
349   Token Tok;
350   std::vector<Token> Stack;
351   MachineTypes Machine;
352   COFFModuleDefinition Info;
353   bool MingwDef;
354 };
355 
parseCOFFModuleDefinition(MemoryBufferRef MB,MachineTypes Machine,bool MingwDef)356 Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
357                                                          MachineTypes Machine,
358                                                          bool MingwDef) {
359   return Parser(MB.getBuffer(), Machine, MingwDef).parse();
360 }
361 
362 } // namespace object
363 } // namespace llvm
364