10b57cec5SDimitry Andric //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // Windows-specific.
100b57cec5SDimitry Andric // A parser for the module-definition file (.def file).
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric // The format of module-definition files are described in this document:
130b57cec5SDimitry Andric // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
160b57cec5SDimitry Andric
170b57cec5SDimitry Andric #include "llvm/Object/COFFModuleDefinition.h"
180b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
190b57cec5SDimitry Andric #include "llvm/ADT/StringSwitch.h"
200b57cec5SDimitry Andric #include "llvm/Object/COFFImportFile.h"
210b57cec5SDimitry Andric #include "llvm/Object/Error.h"
220b57cec5SDimitry Andric #include "llvm/Support/Error.h"
230b57cec5SDimitry Andric #include "llvm/Support/Path.h"
240b57cec5SDimitry Andric
250b57cec5SDimitry Andric using namespace llvm::COFF;
260b57cec5SDimitry Andric using namespace llvm;
270b57cec5SDimitry Andric
280b57cec5SDimitry Andric namespace llvm {
290b57cec5SDimitry Andric namespace object {
300b57cec5SDimitry Andric
310b57cec5SDimitry Andric enum Kind {
320b57cec5SDimitry Andric Unknown,
330b57cec5SDimitry Andric Eof,
340b57cec5SDimitry Andric Identifier,
350b57cec5SDimitry Andric Comma,
360b57cec5SDimitry Andric Equal,
370b57cec5SDimitry Andric EqualEqual,
380b57cec5SDimitry Andric KwBase,
390b57cec5SDimitry Andric KwConstant,
400b57cec5SDimitry Andric KwData,
410b57cec5SDimitry Andric KwExports,
420b57cec5SDimitry Andric KwHeapsize,
430b57cec5SDimitry Andric KwLibrary,
440b57cec5SDimitry Andric KwName,
450b57cec5SDimitry Andric KwNoname,
460b57cec5SDimitry Andric KwPrivate,
470b57cec5SDimitry Andric KwStacksize,
480b57cec5SDimitry Andric KwVersion,
490b57cec5SDimitry Andric };
500b57cec5SDimitry Andric
510b57cec5SDimitry Andric struct Token {
Tokenllvm::object::Token520b57cec5SDimitry Andric explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
530b57cec5SDimitry Andric Kind K;
540b57cec5SDimitry Andric StringRef Value;
550b57cec5SDimitry Andric };
560b57cec5SDimitry Andric
isDecorated(StringRef Sym,bool MingwDef)570b57cec5SDimitry Andric static bool isDecorated(StringRef Sym, bool MingwDef) {
580b57cec5SDimitry Andric // In def files, the symbols can either be listed decorated or undecorated.
590b57cec5SDimitry Andric //
600b57cec5SDimitry Andric // - For cdecl symbols, only the undecorated form is allowed.
610b57cec5SDimitry Andric // - For fastcall and vectorcall symbols, both fully decorated or
620b57cec5SDimitry Andric // undecorated forms can be present.
630b57cec5SDimitry Andric // - For stdcall symbols in non-MinGW environments, the decorated form is
640b57cec5SDimitry Andric // fully decorated with leading underscore and trailing stack argument
650b57cec5SDimitry Andric // size - like "_Func@0".
660b57cec5SDimitry Andric // - In MinGW def files, a decorated stdcall symbol does not include the
670b57cec5SDimitry Andric // leading underscore though, like "Func@0".
680b57cec5SDimitry Andric
690b57cec5SDimitry Andric // This function controls whether a leading underscore should be added to
700b57cec5SDimitry Andric // the given symbol name or not. For MinGW, treat a stdcall symbol name such
710b57cec5SDimitry Andric // as "Func@0" as undecorated, i.e. a leading underscore must be added.
720b57cec5SDimitry Andric // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
730b57cec5SDimitry Andric // as decorated, i.e. don't add any more leading underscores.
740b57cec5SDimitry Andric // We can't check for a leading underscore here, since function names
750b57cec5SDimitry Andric // themselves can start with an underscore, while a second one still needs
760b57cec5SDimitry Andric // to be added.
775f757f3fSDimitry Andric return Sym.starts_with("@") || Sym.contains("@@") || Sym.starts_with("?") ||
780b57cec5SDimitry Andric (!MingwDef && Sym.contains('@'));
790b57cec5SDimitry Andric }
800b57cec5SDimitry Andric
810b57cec5SDimitry Andric class Lexer {
820b57cec5SDimitry Andric public:
Lexer(StringRef S)830b57cec5SDimitry Andric Lexer(StringRef S) : Buf(S) {}
840b57cec5SDimitry Andric
lex()850b57cec5SDimitry Andric Token lex() {
860b57cec5SDimitry Andric Buf = Buf.trim();
870b57cec5SDimitry Andric if (Buf.empty())
880b57cec5SDimitry Andric return Token(Eof);
890b57cec5SDimitry Andric
900b57cec5SDimitry Andric switch (Buf[0]) {
910b57cec5SDimitry Andric case '\0':
920b57cec5SDimitry Andric return Token(Eof);
930b57cec5SDimitry Andric case ';': {
940b57cec5SDimitry Andric size_t End = Buf.find('\n');
950b57cec5SDimitry Andric Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
960b57cec5SDimitry Andric return lex();
970b57cec5SDimitry Andric }
980b57cec5SDimitry Andric case '=':
990b57cec5SDimitry Andric Buf = Buf.drop_front();
1005f757f3fSDimitry Andric if (Buf.starts_with("=")) {
1010b57cec5SDimitry Andric Buf = Buf.drop_front();
1020b57cec5SDimitry Andric return Token(EqualEqual, "==");
1030b57cec5SDimitry Andric }
1040b57cec5SDimitry Andric return Token(Equal, "=");
1050b57cec5SDimitry Andric case ',':
1060b57cec5SDimitry Andric Buf = Buf.drop_front();
1070b57cec5SDimitry Andric return Token(Comma, ",");
1080b57cec5SDimitry Andric case '"': {
1090b57cec5SDimitry Andric StringRef S;
1100b57cec5SDimitry Andric std::tie(S, Buf) = Buf.substr(1).split('"');
1110b57cec5SDimitry Andric return Token(Identifier, S);
1120b57cec5SDimitry Andric }
1130b57cec5SDimitry Andric default: {
1140b57cec5SDimitry Andric size_t End = Buf.find_first_of("=,;\r\n \t\v");
1150b57cec5SDimitry Andric StringRef Word = Buf.substr(0, End);
1160b57cec5SDimitry Andric Kind K = llvm::StringSwitch<Kind>(Word)
1170b57cec5SDimitry Andric .Case("BASE", KwBase)
1180b57cec5SDimitry Andric .Case("CONSTANT", KwConstant)
1190b57cec5SDimitry Andric .Case("DATA", KwData)
1200b57cec5SDimitry Andric .Case("EXPORTS", KwExports)
1210b57cec5SDimitry Andric .Case("HEAPSIZE", KwHeapsize)
1220b57cec5SDimitry Andric .Case("LIBRARY", KwLibrary)
1230b57cec5SDimitry Andric .Case("NAME", KwName)
1240b57cec5SDimitry Andric .Case("NONAME", KwNoname)
1250b57cec5SDimitry Andric .Case("PRIVATE", KwPrivate)
1260b57cec5SDimitry Andric .Case("STACKSIZE", KwStacksize)
1270b57cec5SDimitry Andric .Case("VERSION", KwVersion)
1280b57cec5SDimitry Andric .Default(Identifier);
1290b57cec5SDimitry Andric Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
1300b57cec5SDimitry Andric return Token(K, Word);
1310b57cec5SDimitry Andric }
1320b57cec5SDimitry Andric }
1330b57cec5SDimitry Andric }
1340b57cec5SDimitry Andric
1350b57cec5SDimitry Andric private:
1360b57cec5SDimitry Andric StringRef Buf;
1370b57cec5SDimitry Andric };
1380b57cec5SDimitry Andric
1390b57cec5SDimitry Andric class Parser {
1400b57cec5SDimitry Andric public:
Parser(StringRef S,MachineTypes M,bool B,bool AU)14106c3fb27SDimitry Andric explicit Parser(StringRef S, MachineTypes M, bool B, bool AU)
14206c3fb27SDimitry Andric : Lex(S), Machine(M), MingwDef(B), AddUnderscores(AU) {
14306c3fb27SDimitry Andric if (Machine != IMAGE_FILE_MACHINE_I386)
14406c3fb27SDimitry Andric AddUnderscores = false;
14506c3fb27SDimitry Andric }
1460b57cec5SDimitry Andric
parse()1470b57cec5SDimitry Andric Expected<COFFModuleDefinition> parse() {
1480b57cec5SDimitry Andric do {
1490b57cec5SDimitry Andric if (Error Err = parseOne())
1500b57cec5SDimitry Andric return std::move(Err);
1510b57cec5SDimitry Andric } while (Tok.K != Eof);
1520b57cec5SDimitry Andric return Info;
1530b57cec5SDimitry Andric }
1540b57cec5SDimitry Andric
1550b57cec5SDimitry Andric private:
read()1560b57cec5SDimitry Andric void read() {
1570b57cec5SDimitry Andric if (Stack.empty()) {
1580b57cec5SDimitry Andric Tok = Lex.lex();
1590b57cec5SDimitry Andric return;
1600b57cec5SDimitry Andric }
1610b57cec5SDimitry Andric Tok = Stack.back();
1620b57cec5SDimitry Andric Stack.pop_back();
1630b57cec5SDimitry Andric }
1640b57cec5SDimitry Andric
readAsInt(uint64_t * I)1650b57cec5SDimitry Andric Error readAsInt(uint64_t *I) {
1660b57cec5SDimitry Andric read();
1670b57cec5SDimitry Andric if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
1680b57cec5SDimitry Andric return createError("integer expected");
1690b57cec5SDimitry Andric return Error::success();
1700b57cec5SDimitry Andric }
1710b57cec5SDimitry Andric
expect(Kind Expected,StringRef Msg)1720b57cec5SDimitry Andric Error expect(Kind Expected, StringRef Msg) {
1730b57cec5SDimitry Andric read();
1740b57cec5SDimitry Andric if (Tok.K != Expected)
1750b57cec5SDimitry Andric return createError(Msg);
1760b57cec5SDimitry Andric return Error::success();
1770b57cec5SDimitry Andric }
1780b57cec5SDimitry Andric
unget()1790b57cec5SDimitry Andric void unget() { Stack.push_back(Tok); }
1800b57cec5SDimitry Andric
parseOne()1810b57cec5SDimitry Andric Error parseOne() {
1820b57cec5SDimitry Andric read();
1830b57cec5SDimitry Andric switch (Tok.K) {
1840b57cec5SDimitry Andric case Eof:
1850b57cec5SDimitry Andric return Error::success();
1860b57cec5SDimitry Andric case KwExports:
1870b57cec5SDimitry Andric for (;;) {
1880b57cec5SDimitry Andric read();
1890b57cec5SDimitry Andric if (Tok.K != Identifier) {
1900b57cec5SDimitry Andric unget();
1910b57cec5SDimitry Andric return Error::success();
1920b57cec5SDimitry Andric }
1930b57cec5SDimitry Andric if (Error Err = parseExport())
1940b57cec5SDimitry Andric return Err;
1950b57cec5SDimitry Andric }
1960b57cec5SDimitry Andric case KwHeapsize:
1970b57cec5SDimitry Andric return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
1980b57cec5SDimitry Andric case KwStacksize:
1990b57cec5SDimitry Andric return parseNumbers(&Info.StackReserve, &Info.StackCommit);
2000b57cec5SDimitry Andric case KwLibrary:
2010b57cec5SDimitry Andric case KwName: {
2020b57cec5SDimitry Andric bool IsDll = Tok.K == KwLibrary; // Check before parseName.
2030b57cec5SDimitry Andric std::string Name;
2040b57cec5SDimitry Andric if (Error Err = parseName(&Name, &Info.ImageBase))
2050b57cec5SDimitry Andric return Err;
2060b57cec5SDimitry Andric
2070b57cec5SDimitry Andric Info.ImportName = Name;
2080b57cec5SDimitry Andric
2090b57cec5SDimitry Andric // Set the output file, but don't override /out if it was already passed.
2100b57cec5SDimitry Andric if (Info.OutputFile.empty()) {
2110b57cec5SDimitry Andric Info.OutputFile = Name;
2120b57cec5SDimitry Andric // Append the appropriate file extension if not already present.
2130b57cec5SDimitry Andric if (!sys::path::has_extension(Name))
2140b57cec5SDimitry Andric Info.OutputFile += IsDll ? ".dll" : ".exe";
2150b57cec5SDimitry Andric }
2160b57cec5SDimitry Andric
2170b57cec5SDimitry Andric return Error::success();
2180b57cec5SDimitry Andric }
2190b57cec5SDimitry Andric case KwVersion:
2200b57cec5SDimitry Andric return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
2210b57cec5SDimitry Andric default:
2220b57cec5SDimitry Andric return createError("unknown directive: " + Tok.Value);
2230b57cec5SDimitry Andric }
2240b57cec5SDimitry Andric }
2250b57cec5SDimitry Andric
parseExport()2260b57cec5SDimitry Andric Error parseExport() {
2270b57cec5SDimitry Andric COFFShortExport E;
2285ffd83dbSDimitry Andric E.Name = std::string(Tok.Value);
2290b57cec5SDimitry Andric read();
2300b57cec5SDimitry Andric if (Tok.K == Equal) {
2310b57cec5SDimitry Andric read();
2320b57cec5SDimitry Andric if (Tok.K != Identifier)
2330b57cec5SDimitry Andric return createError("identifier expected, but got " + Tok.Value);
2340b57cec5SDimitry Andric E.ExtName = E.Name;
2355ffd83dbSDimitry Andric E.Name = std::string(Tok.Value);
2360b57cec5SDimitry Andric } else {
2370b57cec5SDimitry Andric unget();
2380b57cec5SDimitry Andric }
2390b57cec5SDimitry Andric
24006c3fb27SDimitry Andric if (AddUnderscores) {
2410b57cec5SDimitry Andric if (!isDecorated(E.Name, MingwDef))
2420b57cec5SDimitry Andric E.Name = (std::string("_").append(E.Name));
2430b57cec5SDimitry Andric if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
2440b57cec5SDimitry Andric E.ExtName = (std::string("_").append(E.ExtName));
2450b57cec5SDimitry Andric }
2460b57cec5SDimitry Andric
2470b57cec5SDimitry Andric for (;;) {
2480b57cec5SDimitry Andric read();
2490b57cec5SDimitry Andric if (Tok.K == Identifier && Tok.Value[0] == '@') {
2500b57cec5SDimitry Andric if (Tok.Value == "@") {
2510b57cec5SDimitry Andric // "foo @ 10"
2520b57cec5SDimitry Andric read();
2530b57cec5SDimitry Andric Tok.Value.getAsInteger(10, E.Ordinal);
2540b57cec5SDimitry Andric } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
2550b57cec5SDimitry Andric // "foo \n @bar" - Not an ordinal modifier at all, but the next
2560b57cec5SDimitry Andric // export (fastcall decorated) - complete the current one.
2570b57cec5SDimitry Andric unget();
2580b57cec5SDimitry Andric Info.Exports.push_back(E);
2590b57cec5SDimitry Andric return Error::success();
2600b57cec5SDimitry Andric }
2610b57cec5SDimitry Andric // "foo @10"
2620b57cec5SDimitry Andric read();
2630b57cec5SDimitry Andric if (Tok.K == KwNoname) {
2640b57cec5SDimitry Andric E.Noname = true;
2650b57cec5SDimitry Andric } else {
2660b57cec5SDimitry Andric unget();
2670b57cec5SDimitry Andric }
2680b57cec5SDimitry Andric continue;
2690b57cec5SDimitry Andric }
2700b57cec5SDimitry Andric if (Tok.K == KwData) {
2710b57cec5SDimitry Andric E.Data = true;
2720b57cec5SDimitry Andric continue;
2730b57cec5SDimitry Andric }
2740b57cec5SDimitry Andric if (Tok.K == KwConstant) {
2750b57cec5SDimitry Andric E.Constant = true;
2760b57cec5SDimitry Andric continue;
2770b57cec5SDimitry Andric }
2780b57cec5SDimitry Andric if (Tok.K == KwPrivate) {
2790b57cec5SDimitry Andric E.Private = true;
2800b57cec5SDimitry Andric continue;
2810b57cec5SDimitry Andric }
2820b57cec5SDimitry Andric if (Tok.K == EqualEqual) {
2830b57cec5SDimitry Andric read();
2845ffd83dbSDimitry Andric E.AliasTarget = std::string(Tok.Value);
28506c3fb27SDimitry Andric if (AddUnderscores && !isDecorated(E.AliasTarget, MingwDef))
2860b57cec5SDimitry Andric E.AliasTarget = std::string("_").append(E.AliasTarget);
2870b57cec5SDimitry Andric continue;
2880b57cec5SDimitry Andric }
2890b57cec5SDimitry Andric unget();
2900b57cec5SDimitry Andric Info.Exports.push_back(E);
2910b57cec5SDimitry Andric return Error::success();
2920b57cec5SDimitry Andric }
2930b57cec5SDimitry Andric }
2940b57cec5SDimitry Andric
2950b57cec5SDimitry Andric // HEAPSIZE/STACKSIZE reserve[,commit]
parseNumbers(uint64_t * Reserve,uint64_t * Commit)2960b57cec5SDimitry Andric Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
2970b57cec5SDimitry Andric if (Error Err = readAsInt(Reserve))
2980b57cec5SDimitry Andric return Err;
2990b57cec5SDimitry Andric read();
3000b57cec5SDimitry Andric if (Tok.K != Comma) {
3010b57cec5SDimitry Andric unget();
3020b57cec5SDimitry Andric Commit = nullptr;
3030b57cec5SDimitry Andric return Error::success();
3040b57cec5SDimitry Andric }
3050b57cec5SDimitry Andric if (Error Err = readAsInt(Commit))
3060b57cec5SDimitry Andric return Err;
3070b57cec5SDimitry Andric return Error::success();
3080b57cec5SDimitry Andric }
3090b57cec5SDimitry Andric
3100b57cec5SDimitry Andric // NAME outputPath [BASE=address]
parseName(std::string * Out,uint64_t * Baseaddr)3110b57cec5SDimitry Andric Error parseName(std::string *Out, uint64_t *Baseaddr) {
3120b57cec5SDimitry Andric read();
3130b57cec5SDimitry Andric if (Tok.K == Identifier) {
3145ffd83dbSDimitry Andric *Out = std::string(Tok.Value);
3150b57cec5SDimitry Andric } else {
3160b57cec5SDimitry Andric *Out = "";
3170b57cec5SDimitry Andric unget();
3180b57cec5SDimitry Andric return Error::success();
3190b57cec5SDimitry Andric }
3200b57cec5SDimitry Andric read();
3210b57cec5SDimitry Andric if (Tok.K == KwBase) {
3220b57cec5SDimitry Andric if (Error Err = expect(Equal, "'=' expected"))
3230b57cec5SDimitry Andric return Err;
3240b57cec5SDimitry Andric if (Error Err = readAsInt(Baseaddr))
3250b57cec5SDimitry Andric return Err;
3260b57cec5SDimitry Andric } else {
3270b57cec5SDimitry Andric unget();
3280b57cec5SDimitry Andric *Baseaddr = 0;
3290b57cec5SDimitry Andric }
3300b57cec5SDimitry Andric return Error::success();
3310b57cec5SDimitry Andric }
3320b57cec5SDimitry Andric
3330b57cec5SDimitry Andric // VERSION major[.minor]
parseVersion(uint32_t * Major,uint32_t * Minor)3340b57cec5SDimitry Andric Error parseVersion(uint32_t *Major, uint32_t *Minor) {
3350b57cec5SDimitry Andric read();
3360b57cec5SDimitry Andric if (Tok.K != Identifier)
3370b57cec5SDimitry Andric return createError("identifier expected, but got " + Tok.Value);
3380b57cec5SDimitry Andric StringRef V1, V2;
3390b57cec5SDimitry Andric std::tie(V1, V2) = Tok.Value.split('.');
3400b57cec5SDimitry Andric if (V1.getAsInteger(10, *Major))
3410b57cec5SDimitry Andric return createError("integer expected, but got " + Tok.Value);
3420b57cec5SDimitry Andric if (V2.empty())
3430b57cec5SDimitry Andric *Minor = 0;
3440b57cec5SDimitry Andric else if (V2.getAsInteger(10, *Minor))
3450b57cec5SDimitry Andric return createError("integer expected, but got " + Tok.Value);
3460b57cec5SDimitry Andric return Error::success();
3470b57cec5SDimitry Andric }
3480b57cec5SDimitry Andric
3490b57cec5SDimitry Andric Lexer Lex;
3500b57cec5SDimitry Andric Token Tok;
3510b57cec5SDimitry Andric std::vector<Token> Stack;
3520b57cec5SDimitry Andric MachineTypes Machine;
3530b57cec5SDimitry Andric COFFModuleDefinition Info;
3540b57cec5SDimitry Andric bool MingwDef;
35506c3fb27SDimitry Andric bool AddUnderscores;
3560b57cec5SDimitry Andric };
3570b57cec5SDimitry Andric
parseCOFFModuleDefinition(MemoryBufferRef MB,MachineTypes Machine,bool MingwDef,bool AddUnderscores)3580b57cec5SDimitry Andric Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
3590b57cec5SDimitry Andric MachineTypes Machine,
36006c3fb27SDimitry Andric bool MingwDef,
36106c3fb27SDimitry Andric bool AddUnderscores) {
36206c3fb27SDimitry Andric return Parser(MB.getBuffer(), Machine, MingwDef, AddUnderscores).parse();
3630b57cec5SDimitry Andric }
3640b57cec5SDimitry Andric
3650b57cec5SDimitry Andric } // namespace object
3660b57cec5SDimitry Andric } // namespace llvm
367