10b57cec5SDimitry Andric //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // Windows-specific.
100b57cec5SDimitry Andric // A parser for the module-definition file (.def file).
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric // The format of module-definition files are described in this document:
130b57cec5SDimitry Andric // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric #include "llvm/Object/COFFModuleDefinition.h"
180b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
190b57cec5SDimitry Andric #include "llvm/ADT/StringSwitch.h"
200b57cec5SDimitry Andric #include "llvm/Object/COFFImportFile.h"
210b57cec5SDimitry Andric #include "llvm/Object/Error.h"
220b57cec5SDimitry Andric #include "llvm/Support/Error.h"
230b57cec5SDimitry Andric #include "llvm/Support/Path.h"
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric using namespace llvm::COFF;
260b57cec5SDimitry Andric using namespace llvm;
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric namespace llvm {
290b57cec5SDimitry Andric namespace object {
300b57cec5SDimitry Andric 
310b57cec5SDimitry Andric enum Kind {
320b57cec5SDimitry Andric   Unknown,
330b57cec5SDimitry Andric   Eof,
340b57cec5SDimitry Andric   Identifier,
350b57cec5SDimitry Andric   Comma,
360b57cec5SDimitry Andric   Equal,
370b57cec5SDimitry Andric   EqualEqual,
380b57cec5SDimitry Andric   KwBase,
390b57cec5SDimitry Andric   KwConstant,
400b57cec5SDimitry Andric   KwData,
410b57cec5SDimitry Andric   KwExports,
420b57cec5SDimitry Andric   KwHeapsize,
430b57cec5SDimitry Andric   KwLibrary,
440b57cec5SDimitry Andric   KwName,
450b57cec5SDimitry Andric   KwNoname,
460b57cec5SDimitry Andric   KwPrivate,
470b57cec5SDimitry Andric   KwStacksize,
480b57cec5SDimitry Andric   KwVersion,
490b57cec5SDimitry Andric };
500b57cec5SDimitry Andric 
510b57cec5SDimitry Andric struct Token {
Tokenllvm::object::Token520b57cec5SDimitry Andric   explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
530b57cec5SDimitry Andric   Kind K;
540b57cec5SDimitry Andric   StringRef Value;
550b57cec5SDimitry Andric };
560b57cec5SDimitry Andric 
isDecorated(StringRef Sym,bool MingwDef)570b57cec5SDimitry Andric static bool isDecorated(StringRef Sym, bool MingwDef) {
580b57cec5SDimitry Andric   // In def files, the symbols can either be listed decorated or undecorated.
590b57cec5SDimitry Andric   //
600b57cec5SDimitry Andric   // - For cdecl symbols, only the undecorated form is allowed.
610b57cec5SDimitry Andric   // - For fastcall and vectorcall symbols, both fully decorated or
620b57cec5SDimitry Andric   //   undecorated forms can be present.
630b57cec5SDimitry Andric   // - For stdcall symbols in non-MinGW environments, the decorated form is
640b57cec5SDimitry Andric   //   fully decorated with leading underscore and trailing stack argument
650b57cec5SDimitry Andric   //   size - like "_Func@0".
660b57cec5SDimitry Andric   // - In MinGW def files, a decorated stdcall symbol does not include the
670b57cec5SDimitry Andric   //   leading underscore though, like "Func@0".
680b57cec5SDimitry Andric 
690b57cec5SDimitry Andric   // This function controls whether a leading underscore should be added to
700b57cec5SDimitry Andric   // the given symbol name or not. For MinGW, treat a stdcall symbol name such
710b57cec5SDimitry Andric   // as "Func@0" as undecorated, i.e. a leading underscore must be added.
720b57cec5SDimitry Andric   // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
730b57cec5SDimitry Andric   // as decorated, i.e. don't add any more leading underscores.
740b57cec5SDimitry Andric   // We can't check for a leading underscore here, since function names
750b57cec5SDimitry Andric   // themselves can start with an underscore, while a second one still needs
760b57cec5SDimitry Andric   // to be added.
775f757f3fSDimitry Andric   return Sym.starts_with("@") || Sym.contains("@@") || Sym.starts_with("?") ||
780b57cec5SDimitry Andric          (!MingwDef && Sym.contains('@'));
790b57cec5SDimitry Andric }
800b57cec5SDimitry Andric 
810b57cec5SDimitry Andric class Lexer {
820b57cec5SDimitry Andric public:
Lexer(StringRef S)830b57cec5SDimitry Andric   Lexer(StringRef S) : Buf(S) {}
840b57cec5SDimitry Andric 
lex()850b57cec5SDimitry Andric   Token lex() {
860b57cec5SDimitry Andric     Buf = Buf.trim();
870b57cec5SDimitry Andric     if (Buf.empty())
880b57cec5SDimitry Andric       return Token(Eof);
890b57cec5SDimitry Andric 
900b57cec5SDimitry Andric     switch (Buf[0]) {
910b57cec5SDimitry Andric     case '\0':
920b57cec5SDimitry Andric       return Token(Eof);
930b57cec5SDimitry Andric     case ';': {
940b57cec5SDimitry Andric       size_t End = Buf.find('\n');
950b57cec5SDimitry Andric       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
960b57cec5SDimitry Andric       return lex();
970b57cec5SDimitry Andric     }
980b57cec5SDimitry Andric     case '=':
990b57cec5SDimitry Andric       Buf = Buf.drop_front();
1005f757f3fSDimitry Andric       if (Buf.starts_with("=")) {
1010b57cec5SDimitry Andric         Buf = Buf.drop_front();
1020b57cec5SDimitry Andric         return Token(EqualEqual, "==");
1030b57cec5SDimitry Andric       }
1040b57cec5SDimitry Andric       return Token(Equal, "=");
1050b57cec5SDimitry Andric     case ',':
1060b57cec5SDimitry Andric       Buf = Buf.drop_front();
1070b57cec5SDimitry Andric       return Token(Comma, ",");
1080b57cec5SDimitry Andric     case '"': {
1090b57cec5SDimitry Andric       StringRef S;
1100b57cec5SDimitry Andric       std::tie(S, Buf) = Buf.substr(1).split('"');
1110b57cec5SDimitry Andric       return Token(Identifier, S);
1120b57cec5SDimitry Andric     }
1130b57cec5SDimitry Andric     default: {
1140b57cec5SDimitry Andric       size_t End = Buf.find_first_of("=,;\r\n \t\v");
1150b57cec5SDimitry Andric       StringRef Word = Buf.substr(0, End);
1160b57cec5SDimitry Andric       Kind K = llvm::StringSwitch<Kind>(Word)
1170b57cec5SDimitry Andric                    .Case("BASE", KwBase)
1180b57cec5SDimitry Andric                    .Case("CONSTANT", KwConstant)
1190b57cec5SDimitry Andric                    .Case("DATA", KwData)
1200b57cec5SDimitry Andric                    .Case("EXPORTS", KwExports)
1210b57cec5SDimitry Andric                    .Case("HEAPSIZE", KwHeapsize)
1220b57cec5SDimitry Andric                    .Case("LIBRARY", KwLibrary)
1230b57cec5SDimitry Andric                    .Case("NAME", KwName)
1240b57cec5SDimitry Andric                    .Case("NONAME", KwNoname)
1250b57cec5SDimitry Andric                    .Case("PRIVATE", KwPrivate)
1260b57cec5SDimitry Andric                    .Case("STACKSIZE", KwStacksize)
1270b57cec5SDimitry Andric                    .Case("VERSION", KwVersion)
1280b57cec5SDimitry Andric                    .Default(Identifier);
1290b57cec5SDimitry Andric       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
1300b57cec5SDimitry Andric       return Token(K, Word);
1310b57cec5SDimitry Andric     }
1320b57cec5SDimitry Andric     }
1330b57cec5SDimitry Andric   }
1340b57cec5SDimitry Andric 
1350b57cec5SDimitry Andric private:
1360b57cec5SDimitry Andric   StringRef Buf;
1370b57cec5SDimitry Andric };
1380b57cec5SDimitry Andric 
1390b57cec5SDimitry Andric class Parser {
1400b57cec5SDimitry Andric public:
Parser(StringRef S,MachineTypes M,bool B,bool AU)14106c3fb27SDimitry Andric   explicit Parser(StringRef S, MachineTypes M, bool B, bool AU)
14206c3fb27SDimitry Andric       : Lex(S), Machine(M), MingwDef(B), AddUnderscores(AU) {
14306c3fb27SDimitry Andric     if (Machine != IMAGE_FILE_MACHINE_I386)
14406c3fb27SDimitry Andric       AddUnderscores = false;
14506c3fb27SDimitry Andric   }
1460b57cec5SDimitry Andric 
parse()1470b57cec5SDimitry Andric   Expected<COFFModuleDefinition> parse() {
1480b57cec5SDimitry Andric     do {
1490b57cec5SDimitry Andric       if (Error Err = parseOne())
1500b57cec5SDimitry Andric         return std::move(Err);
1510b57cec5SDimitry Andric     } while (Tok.K != Eof);
1520b57cec5SDimitry Andric     return Info;
1530b57cec5SDimitry Andric   }
1540b57cec5SDimitry Andric 
1550b57cec5SDimitry Andric private:
read()1560b57cec5SDimitry Andric   void read() {
1570b57cec5SDimitry Andric     if (Stack.empty()) {
1580b57cec5SDimitry Andric       Tok = Lex.lex();
1590b57cec5SDimitry Andric       return;
1600b57cec5SDimitry Andric     }
1610b57cec5SDimitry Andric     Tok = Stack.back();
1620b57cec5SDimitry Andric     Stack.pop_back();
1630b57cec5SDimitry Andric   }
1640b57cec5SDimitry Andric 
readAsInt(uint64_t * I)1650b57cec5SDimitry Andric   Error readAsInt(uint64_t *I) {
1660b57cec5SDimitry Andric     read();
1670b57cec5SDimitry Andric     if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
1680b57cec5SDimitry Andric       return createError("integer expected");
1690b57cec5SDimitry Andric     return Error::success();
1700b57cec5SDimitry Andric   }
1710b57cec5SDimitry Andric 
expect(Kind Expected,StringRef Msg)1720b57cec5SDimitry Andric   Error expect(Kind Expected, StringRef Msg) {
1730b57cec5SDimitry Andric     read();
1740b57cec5SDimitry Andric     if (Tok.K != Expected)
1750b57cec5SDimitry Andric       return createError(Msg);
1760b57cec5SDimitry Andric     return Error::success();
1770b57cec5SDimitry Andric   }
1780b57cec5SDimitry Andric 
unget()1790b57cec5SDimitry Andric   void unget() { Stack.push_back(Tok); }
1800b57cec5SDimitry Andric 
parseOne()1810b57cec5SDimitry Andric   Error parseOne() {
1820b57cec5SDimitry Andric     read();
1830b57cec5SDimitry Andric     switch (Tok.K) {
1840b57cec5SDimitry Andric     case Eof:
1850b57cec5SDimitry Andric       return Error::success();
1860b57cec5SDimitry Andric     case KwExports:
1870b57cec5SDimitry Andric       for (;;) {
1880b57cec5SDimitry Andric         read();
1890b57cec5SDimitry Andric         if (Tok.K != Identifier) {
1900b57cec5SDimitry Andric           unget();
1910b57cec5SDimitry Andric           return Error::success();
1920b57cec5SDimitry Andric         }
1930b57cec5SDimitry Andric         if (Error Err = parseExport())
1940b57cec5SDimitry Andric           return Err;
1950b57cec5SDimitry Andric       }
1960b57cec5SDimitry Andric     case KwHeapsize:
1970b57cec5SDimitry Andric       return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
1980b57cec5SDimitry Andric     case KwStacksize:
1990b57cec5SDimitry Andric       return parseNumbers(&Info.StackReserve, &Info.StackCommit);
2000b57cec5SDimitry Andric     case KwLibrary:
2010b57cec5SDimitry Andric     case KwName: {
2020b57cec5SDimitry Andric       bool IsDll = Tok.K == KwLibrary; // Check before parseName.
2030b57cec5SDimitry Andric       std::string Name;
2040b57cec5SDimitry Andric       if (Error Err = parseName(&Name, &Info.ImageBase))
2050b57cec5SDimitry Andric         return Err;
2060b57cec5SDimitry Andric 
2070b57cec5SDimitry Andric       Info.ImportName = Name;
2080b57cec5SDimitry Andric 
2090b57cec5SDimitry Andric       // Set the output file, but don't override /out if it was already passed.
2100b57cec5SDimitry Andric       if (Info.OutputFile.empty()) {
2110b57cec5SDimitry Andric         Info.OutputFile = Name;
2120b57cec5SDimitry Andric         // Append the appropriate file extension if not already present.
2130b57cec5SDimitry Andric         if (!sys::path::has_extension(Name))
2140b57cec5SDimitry Andric           Info.OutputFile += IsDll ? ".dll" : ".exe";
2150b57cec5SDimitry Andric       }
2160b57cec5SDimitry Andric 
2170b57cec5SDimitry Andric       return Error::success();
2180b57cec5SDimitry Andric     }
2190b57cec5SDimitry Andric     case KwVersion:
2200b57cec5SDimitry Andric       return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
2210b57cec5SDimitry Andric     default:
2220b57cec5SDimitry Andric       return createError("unknown directive: " + Tok.Value);
2230b57cec5SDimitry Andric     }
2240b57cec5SDimitry Andric   }
2250b57cec5SDimitry Andric 
parseExport()2260b57cec5SDimitry Andric   Error parseExport() {
2270b57cec5SDimitry Andric     COFFShortExport E;
2285ffd83dbSDimitry Andric     E.Name = std::string(Tok.Value);
2290b57cec5SDimitry Andric     read();
2300b57cec5SDimitry Andric     if (Tok.K == Equal) {
2310b57cec5SDimitry Andric       read();
2320b57cec5SDimitry Andric       if (Tok.K != Identifier)
2330b57cec5SDimitry Andric         return createError("identifier expected, but got " + Tok.Value);
2340b57cec5SDimitry Andric       E.ExtName = E.Name;
2355ffd83dbSDimitry Andric       E.Name = std::string(Tok.Value);
2360b57cec5SDimitry Andric     } else {
2370b57cec5SDimitry Andric       unget();
2380b57cec5SDimitry Andric     }
2390b57cec5SDimitry Andric 
24006c3fb27SDimitry Andric     if (AddUnderscores) {
2410b57cec5SDimitry Andric       if (!isDecorated(E.Name, MingwDef))
2420b57cec5SDimitry Andric         E.Name = (std::string("_").append(E.Name));
2430b57cec5SDimitry Andric       if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
2440b57cec5SDimitry Andric         E.ExtName = (std::string("_").append(E.ExtName));
2450b57cec5SDimitry Andric     }
2460b57cec5SDimitry Andric 
2470b57cec5SDimitry Andric     for (;;) {
2480b57cec5SDimitry Andric       read();
2490b57cec5SDimitry Andric       if (Tok.K == Identifier && Tok.Value[0] == '@') {
2500b57cec5SDimitry Andric         if (Tok.Value == "@") {
2510b57cec5SDimitry Andric           // "foo @ 10"
2520b57cec5SDimitry Andric           read();
2530b57cec5SDimitry Andric           Tok.Value.getAsInteger(10, E.Ordinal);
2540b57cec5SDimitry Andric         } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
2550b57cec5SDimitry Andric           // "foo \n @bar" - Not an ordinal modifier at all, but the next
2560b57cec5SDimitry Andric           // export (fastcall decorated) - complete the current one.
2570b57cec5SDimitry Andric           unget();
2580b57cec5SDimitry Andric           Info.Exports.push_back(E);
2590b57cec5SDimitry Andric           return Error::success();
2600b57cec5SDimitry Andric         }
2610b57cec5SDimitry Andric         // "foo @10"
2620b57cec5SDimitry Andric         read();
2630b57cec5SDimitry Andric         if (Tok.K == KwNoname) {
2640b57cec5SDimitry Andric           E.Noname = true;
2650b57cec5SDimitry Andric         } else {
2660b57cec5SDimitry Andric           unget();
2670b57cec5SDimitry Andric         }
2680b57cec5SDimitry Andric         continue;
2690b57cec5SDimitry Andric       }
2700b57cec5SDimitry Andric       if (Tok.K == KwData) {
2710b57cec5SDimitry Andric         E.Data = true;
2720b57cec5SDimitry Andric         continue;
2730b57cec5SDimitry Andric       }
2740b57cec5SDimitry Andric       if (Tok.K == KwConstant) {
2750b57cec5SDimitry Andric         E.Constant = true;
2760b57cec5SDimitry Andric         continue;
2770b57cec5SDimitry Andric       }
2780b57cec5SDimitry Andric       if (Tok.K == KwPrivate) {
2790b57cec5SDimitry Andric         E.Private = true;
2800b57cec5SDimitry Andric         continue;
2810b57cec5SDimitry Andric       }
2820b57cec5SDimitry Andric       if (Tok.K == EqualEqual) {
2830b57cec5SDimitry Andric         read();
2845ffd83dbSDimitry Andric         E.AliasTarget = std::string(Tok.Value);
28506c3fb27SDimitry Andric         if (AddUnderscores && !isDecorated(E.AliasTarget, MingwDef))
2860b57cec5SDimitry Andric           E.AliasTarget = std::string("_").append(E.AliasTarget);
2870b57cec5SDimitry Andric         continue;
2880b57cec5SDimitry Andric       }
2890b57cec5SDimitry Andric       unget();
2900b57cec5SDimitry Andric       Info.Exports.push_back(E);
2910b57cec5SDimitry Andric       return Error::success();
2920b57cec5SDimitry Andric     }
2930b57cec5SDimitry Andric   }
2940b57cec5SDimitry Andric 
2950b57cec5SDimitry Andric   // HEAPSIZE/STACKSIZE reserve[,commit]
parseNumbers(uint64_t * Reserve,uint64_t * Commit)2960b57cec5SDimitry Andric   Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
2970b57cec5SDimitry Andric     if (Error Err = readAsInt(Reserve))
2980b57cec5SDimitry Andric       return Err;
2990b57cec5SDimitry Andric     read();
3000b57cec5SDimitry Andric     if (Tok.K != Comma) {
3010b57cec5SDimitry Andric       unget();
3020b57cec5SDimitry Andric       Commit = nullptr;
3030b57cec5SDimitry Andric       return Error::success();
3040b57cec5SDimitry Andric     }
3050b57cec5SDimitry Andric     if (Error Err = readAsInt(Commit))
3060b57cec5SDimitry Andric       return Err;
3070b57cec5SDimitry Andric     return Error::success();
3080b57cec5SDimitry Andric   }
3090b57cec5SDimitry Andric 
3100b57cec5SDimitry Andric   // NAME outputPath [BASE=address]
parseName(std::string * Out,uint64_t * Baseaddr)3110b57cec5SDimitry Andric   Error parseName(std::string *Out, uint64_t *Baseaddr) {
3120b57cec5SDimitry Andric     read();
3130b57cec5SDimitry Andric     if (Tok.K == Identifier) {
3145ffd83dbSDimitry Andric       *Out = std::string(Tok.Value);
3150b57cec5SDimitry Andric     } else {
3160b57cec5SDimitry Andric       *Out = "";
3170b57cec5SDimitry Andric       unget();
3180b57cec5SDimitry Andric       return Error::success();
3190b57cec5SDimitry Andric     }
3200b57cec5SDimitry Andric     read();
3210b57cec5SDimitry Andric     if (Tok.K == KwBase) {
3220b57cec5SDimitry Andric       if (Error Err = expect(Equal, "'=' expected"))
3230b57cec5SDimitry Andric         return Err;
3240b57cec5SDimitry Andric       if (Error Err = readAsInt(Baseaddr))
3250b57cec5SDimitry Andric         return Err;
3260b57cec5SDimitry Andric     } else {
3270b57cec5SDimitry Andric       unget();
3280b57cec5SDimitry Andric       *Baseaddr = 0;
3290b57cec5SDimitry Andric     }
3300b57cec5SDimitry Andric     return Error::success();
3310b57cec5SDimitry Andric   }
3320b57cec5SDimitry Andric 
3330b57cec5SDimitry Andric   // VERSION major[.minor]
parseVersion(uint32_t * Major,uint32_t * Minor)3340b57cec5SDimitry Andric   Error parseVersion(uint32_t *Major, uint32_t *Minor) {
3350b57cec5SDimitry Andric     read();
3360b57cec5SDimitry Andric     if (Tok.K != Identifier)
3370b57cec5SDimitry Andric       return createError("identifier expected, but got " + Tok.Value);
3380b57cec5SDimitry Andric     StringRef V1, V2;
3390b57cec5SDimitry Andric     std::tie(V1, V2) = Tok.Value.split('.');
3400b57cec5SDimitry Andric     if (V1.getAsInteger(10, *Major))
3410b57cec5SDimitry Andric       return createError("integer expected, but got " + Tok.Value);
3420b57cec5SDimitry Andric     if (V2.empty())
3430b57cec5SDimitry Andric       *Minor = 0;
3440b57cec5SDimitry Andric     else if (V2.getAsInteger(10, *Minor))
3450b57cec5SDimitry Andric       return createError("integer expected, but got " + Tok.Value);
3460b57cec5SDimitry Andric     return Error::success();
3470b57cec5SDimitry Andric   }
3480b57cec5SDimitry Andric 
3490b57cec5SDimitry Andric   Lexer Lex;
3500b57cec5SDimitry Andric   Token Tok;
3510b57cec5SDimitry Andric   std::vector<Token> Stack;
3520b57cec5SDimitry Andric   MachineTypes Machine;
3530b57cec5SDimitry Andric   COFFModuleDefinition Info;
3540b57cec5SDimitry Andric   bool MingwDef;
35506c3fb27SDimitry Andric   bool AddUnderscores;
3560b57cec5SDimitry Andric };
3570b57cec5SDimitry Andric 
parseCOFFModuleDefinition(MemoryBufferRef MB,MachineTypes Machine,bool MingwDef,bool AddUnderscores)3580b57cec5SDimitry Andric Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
3590b57cec5SDimitry Andric                                                          MachineTypes Machine,
36006c3fb27SDimitry Andric                                                          bool MingwDef,
36106c3fb27SDimitry Andric                                                          bool AddUnderscores) {
36206c3fb27SDimitry Andric   return Parser(MB.getBuffer(), Machine, MingwDef, AddUnderscores).parse();
3630b57cec5SDimitry Andric }
3640b57cec5SDimitry Andric 
3650b57cec5SDimitry Andric } // namespace object
3660b57cec5SDimitry Andric } // namespace llvm
367