1b5630dbaSDimitry Andric //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2b5630dbaSDimitry Andric //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6b5630dbaSDimitry Andric //
7b5630dbaSDimitry Andric //===----------------------------------------------------------------------===//
8b5630dbaSDimitry Andric //
9b5630dbaSDimitry Andric // Windows-specific.
10b5630dbaSDimitry Andric // A parser for the module-definition file (.def file).
11b5630dbaSDimitry Andric //
12b5630dbaSDimitry Andric // The format of module-definition files are described in this document:
13b5630dbaSDimitry Andric // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14b5630dbaSDimitry Andric //
15b5630dbaSDimitry Andric //===----------------------------------------------------------------------===//
16b5630dbaSDimitry Andric
17b5630dbaSDimitry Andric #include "llvm/Object/COFFModuleDefinition.h"
18b5630dbaSDimitry Andric #include "llvm/ADT/StringRef.h"
19b5630dbaSDimitry Andric #include "llvm/ADT/StringSwitch.h"
20b5630dbaSDimitry Andric #include "llvm/Object/COFFImportFile.h"
21b5630dbaSDimitry Andric #include "llvm/Object/Error.h"
22b5630dbaSDimitry Andric #include "llvm/Support/Error.h"
2393c91e39SDimitry Andric #include "llvm/Support/Path.h"
24b5630dbaSDimitry Andric
25b5630dbaSDimitry Andric using namespace llvm::COFF;
26b5630dbaSDimitry Andric using namespace llvm;
27b5630dbaSDimitry Andric
28b5630dbaSDimitry Andric namespace llvm {
29b5630dbaSDimitry Andric namespace object {
30b5630dbaSDimitry Andric
31b5630dbaSDimitry Andric enum Kind {
32b5630dbaSDimitry Andric Unknown,
33b5630dbaSDimitry Andric Eof,
34b5630dbaSDimitry Andric Identifier,
35b5630dbaSDimitry Andric Comma,
36b5630dbaSDimitry Andric Equal,
37eb11fae6SDimitry Andric EqualEqual,
38b5630dbaSDimitry Andric KwBase,
39b5630dbaSDimitry Andric KwConstant,
40b5630dbaSDimitry Andric KwData,
41b5630dbaSDimitry Andric KwExports,
42ac9a064cSDimitry Andric KwExportAs,
43b5630dbaSDimitry Andric KwHeapsize,
44b5630dbaSDimitry Andric KwLibrary,
45b5630dbaSDimitry Andric KwName,
46b5630dbaSDimitry Andric KwNoname,
47b5630dbaSDimitry Andric KwPrivate,
48b5630dbaSDimitry Andric KwStacksize,
49b5630dbaSDimitry Andric KwVersion,
50b5630dbaSDimitry Andric };
51b5630dbaSDimitry Andric
52b5630dbaSDimitry Andric struct Token {
Tokenllvm::object::Token53b5630dbaSDimitry Andric explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
54b5630dbaSDimitry Andric Kind K;
55b5630dbaSDimitry Andric StringRef Value;
56b5630dbaSDimitry Andric };
57b5630dbaSDimitry Andric
isDecorated(StringRef Sym,bool MingwDef)5893c91e39SDimitry Andric static bool isDecorated(StringRef Sym, bool MingwDef) {
59044eb2f6SDimitry Andric // In def files, the symbols can either be listed decorated or undecorated.
60044eb2f6SDimitry Andric //
61044eb2f6SDimitry Andric // - For cdecl symbols, only the undecorated form is allowed.
62044eb2f6SDimitry Andric // - For fastcall and vectorcall symbols, both fully decorated or
63044eb2f6SDimitry Andric // undecorated forms can be present.
64044eb2f6SDimitry Andric // - For stdcall symbols in non-MinGW environments, the decorated form is
65044eb2f6SDimitry Andric // fully decorated with leading underscore and trailing stack argument
66044eb2f6SDimitry Andric // size - like "_Func@0".
67044eb2f6SDimitry Andric // - In MinGW def files, a decorated stdcall symbol does not include the
68044eb2f6SDimitry Andric // leading underscore though, like "Func@0".
69044eb2f6SDimitry Andric
70044eb2f6SDimitry Andric // This function controls whether a leading underscore should be added to
71044eb2f6SDimitry Andric // the given symbol name or not. For MinGW, treat a stdcall symbol name such
72044eb2f6SDimitry Andric // as "Func@0" as undecorated, i.e. a leading underscore must be added.
73044eb2f6SDimitry Andric // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
74044eb2f6SDimitry Andric // as decorated, i.e. don't add any more leading underscores.
75044eb2f6SDimitry Andric // We can't check for a leading underscore here, since function names
76044eb2f6SDimitry Andric // themselves can start with an underscore, while a second one still needs
77044eb2f6SDimitry Andric // to be added.
78312c0ed1SDimitry Andric return Sym.starts_with("@") || Sym.contains("@@") || Sym.starts_with("?") ||
79044eb2f6SDimitry Andric (!MingwDef && Sym.contains('@'));
80b5630dbaSDimitry Andric }
81b5630dbaSDimitry Andric
82b5630dbaSDimitry Andric class Lexer {
83b5630dbaSDimitry Andric public:
Lexer(StringRef S)84b5630dbaSDimitry Andric Lexer(StringRef S) : Buf(S) {}
85b5630dbaSDimitry Andric
lex()86b5630dbaSDimitry Andric Token lex() {
87b5630dbaSDimitry Andric Buf = Buf.trim();
88b5630dbaSDimitry Andric if (Buf.empty())
89b5630dbaSDimitry Andric return Token(Eof);
90b5630dbaSDimitry Andric
91b5630dbaSDimitry Andric switch (Buf[0]) {
92b5630dbaSDimitry Andric case '\0':
93b5630dbaSDimitry Andric return Token(Eof);
94b5630dbaSDimitry Andric case ';': {
95b5630dbaSDimitry Andric size_t End = Buf.find('\n');
96b5630dbaSDimitry Andric Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
97b5630dbaSDimitry Andric return lex();
98b5630dbaSDimitry Andric }
99b5630dbaSDimitry Andric case '=':
100b5630dbaSDimitry Andric Buf = Buf.drop_front();
101ac9a064cSDimitry Andric if (Buf.consume_front("="))
102eb11fae6SDimitry Andric return Token(EqualEqual, "==");
103b5630dbaSDimitry Andric return Token(Equal, "=");
104b5630dbaSDimitry Andric case ',':
105b5630dbaSDimitry Andric Buf = Buf.drop_front();
106b5630dbaSDimitry Andric return Token(Comma, ",");
107b5630dbaSDimitry Andric case '"': {
108b5630dbaSDimitry Andric StringRef S;
109b5630dbaSDimitry Andric std::tie(S, Buf) = Buf.substr(1).split('"');
110b5630dbaSDimitry Andric return Token(Identifier, S);
111b5630dbaSDimitry Andric }
112b5630dbaSDimitry Andric default: {
113044eb2f6SDimitry Andric size_t End = Buf.find_first_of("=,;\r\n \t\v");
114b5630dbaSDimitry Andric StringRef Word = Buf.substr(0, End);
115b5630dbaSDimitry Andric Kind K = llvm::StringSwitch<Kind>(Word)
116b5630dbaSDimitry Andric .Case("BASE", KwBase)
117b5630dbaSDimitry Andric .Case("CONSTANT", KwConstant)
118b5630dbaSDimitry Andric .Case("DATA", KwData)
119b5630dbaSDimitry Andric .Case("EXPORTS", KwExports)
120ac9a064cSDimitry Andric .Case("EXPORTAS", KwExportAs)
121b5630dbaSDimitry Andric .Case("HEAPSIZE", KwHeapsize)
122b5630dbaSDimitry Andric .Case("LIBRARY", KwLibrary)
123b5630dbaSDimitry Andric .Case("NAME", KwName)
124b5630dbaSDimitry Andric .Case("NONAME", KwNoname)
125b5630dbaSDimitry Andric .Case("PRIVATE", KwPrivate)
126b5630dbaSDimitry Andric .Case("STACKSIZE", KwStacksize)
127b5630dbaSDimitry Andric .Case("VERSION", KwVersion)
128b5630dbaSDimitry Andric .Default(Identifier);
129b5630dbaSDimitry Andric Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
130b5630dbaSDimitry Andric return Token(K, Word);
131b5630dbaSDimitry Andric }
132b5630dbaSDimitry Andric }
133b5630dbaSDimitry Andric }
134b5630dbaSDimitry Andric
135b5630dbaSDimitry Andric private:
136b5630dbaSDimitry Andric StringRef Buf;
137b5630dbaSDimitry Andric };
138b5630dbaSDimitry Andric
139b5630dbaSDimitry Andric class Parser {
140b5630dbaSDimitry Andric public:
Parser(StringRef S,MachineTypes M,bool B,bool AU)1417fa27ce4SDimitry Andric explicit Parser(StringRef S, MachineTypes M, bool B, bool AU)
1427fa27ce4SDimitry Andric : Lex(S), Machine(M), MingwDef(B), AddUnderscores(AU) {
1437fa27ce4SDimitry Andric if (Machine != IMAGE_FILE_MACHINE_I386)
1447fa27ce4SDimitry Andric AddUnderscores = false;
1457fa27ce4SDimitry Andric }
146b5630dbaSDimitry Andric
parse()147b5630dbaSDimitry Andric Expected<COFFModuleDefinition> parse() {
148b5630dbaSDimitry Andric do {
149b5630dbaSDimitry Andric if (Error Err = parseOne())
150b5630dbaSDimitry Andric return std::move(Err);
151b5630dbaSDimitry Andric } while (Tok.K != Eof);
152b5630dbaSDimitry Andric return Info;
153b5630dbaSDimitry Andric }
154b5630dbaSDimitry Andric
155b5630dbaSDimitry Andric private:
read()156b5630dbaSDimitry Andric void read() {
157b5630dbaSDimitry Andric if (Stack.empty()) {
158b5630dbaSDimitry Andric Tok = Lex.lex();
159b5630dbaSDimitry Andric return;
160b5630dbaSDimitry Andric }
161b5630dbaSDimitry Andric Tok = Stack.back();
162b5630dbaSDimitry Andric Stack.pop_back();
163b5630dbaSDimitry Andric }
164b5630dbaSDimitry Andric
readAsInt(uint64_t * I)165b5630dbaSDimitry Andric Error readAsInt(uint64_t *I) {
166b5630dbaSDimitry Andric read();
167b5630dbaSDimitry Andric if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
168b5630dbaSDimitry Andric return createError("integer expected");
169b5630dbaSDimitry Andric return Error::success();
170b5630dbaSDimitry Andric }
171b5630dbaSDimitry Andric
expect(Kind Expected,StringRef Msg)172b5630dbaSDimitry Andric Error expect(Kind Expected, StringRef Msg) {
173b5630dbaSDimitry Andric read();
174b5630dbaSDimitry Andric if (Tok.K != Expected)
175b5630dbaSDimitry Andric return createError(Msg);
176b5630dbaSDimitry Andric return Error::success();
177b5630dbaSDimitry Andric }
178b5630dbaSDimitry Andric
unget()179b5630dbaSDimitry Andric void unget() { Stack.push_back(Tok); }
180b5630dbaSDimitry Andric
parseOne()181b5630dbaSDimitry Andric Error parseOne() {
182b5630dbaSDimitry Andric read();
183b5630dbaSDimitry Andric switch (Tok.K) {
184b5630dbaSDimitry Andric case Eof:
185b5630dbaSDimitry Andric return Error::success();
186b5630dbaSDimitry Andric case KwExports:
187b5630dbaSDimitry Andric for (;;) {
188b5630dbaSDimitry Andric read();
189b5630dbaSDimitry Andric if (Tok.K != Identifier) {
190b5630dbaSDimitry Andric unget();
191b5630dbaSDimitry Andric return Error::success();
192b5630dbaSDimitry Andric }
193b5630dbaSDimitry Andric if (Error Err = parseExport())
194b5630dbaSDimitry Andric return Err;
195b5630dbaSDimitry Andric }
196b5630dbaSDimitry Andric case KwHeapsize:
197b5630dbaSDimitry Andric return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
198b5630dbaSDimitry Andric case KwStacksize:
199b5630dbaSDimitry Andric return parseNumbers(&Info.StackReserve, &Info.StackCommit);
200b5630dbaSDimitry Andric case KwLibrary:
201b5630dbaSDimitry Andric case KwName: {
202b5630dbaSDimitry Andric bool IsDll = Tok.K == KwLibrary; // Check before parseName.
203b5630dbaSDimitry Andric std::string Name;
204b5630dbaSDimitry Andric if (Error Err = parseName(&Name, &Info.ImageBase))
205b5630dbaSDimitry Andric return Err;
20693c91e39SDimitry Andric
20793c91e39SDimitry Andric Info.ImportName = Name;
208b5630dbaSDimitry Andric
209b5630dbaSDimitry Andric // Set the output file, but don't override /out if it was already passed.
21093c91e39SDimitry Andric if (Info.OutputFile.empty()) {
211b5630dbaSDimitry Andric Info.OutputFile = Name;
21293c91e39SDimitry Andric // Append the appropriate file extension if not already present.
21393c91e39SDimitry Andric if (!sys::path::has_extension(Name))
21493c91e39SDimitry Andric Info.OutputFile += IsDll ? ".dll" : ".exe";
21593c91e39SDimitry Andric }
21693c91e39SDimitry Andric
217b5630dbaSDimitry Andric return Error::success();
218b5630dbaSDimitry Andric }
219b5630dbaSDimitry Andric case KwVersion:
220b5630dbaSDimitry Andric return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
221b5630dbaSDimitry Andric default:
222b5630dbaSDimitry Andric return createError("unknown directive: " + Tok.Value);
223b5630dbaSDimitry Andric }
224b5630dbaSDimitry Andric }
225b5630dbaSDimitry Andric
parseExport()226b5630dbaSDimitry Andric Error parseExport() {
227b5630dbaSDimitry Andric COFFShortExport E;
228cfca06d7SDimitry Andric E.Name = std::string(Tok.Value);
229b5630dbaSDimitry Andric read();
230b5630dbaSDimitry Andric if (Tok.K == Equal) {
231b5630dbaSDimitry Andric read();
232b5630dbaSDimitry Andric if (Tok.K != Identifier)
233b5630dbaSDimitry Andric return createError("identifier expected, but got " + Tok.Value);
234b5630dbaSDimitry Andric E.ExtName = E.Name;
235cfca06d7SDimitry Andric E.Name = std::string(Tok.Value);
236b5630dbaSDimitry Andric } else {
237b5630dbaSDimitry Andric unget();
238b5630dbaSDimitry Andric }
239b5630dbaSDimitry Andric
2407fa27ce4SDimitry Andric if (AddUnderscores) {
24193c91e39SDimitry Andric if (!isDecorated(E.Name, MingwDef))
242b5630dbaSDimitry Andric E.Name = (std::string("_").append(E.Name));
24393c91e39SDimitry Andric if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
244b5630dbaSDimitry Andric E.ExtName = (std::string("_").append(E.ExtName));
245b5630dbaSDimitry Andric }
246b5630dbaSDimitry Andric
247b5630dbaSDimitry Andric for (;;) {
248b5630dbaSDimitry Andric read();
249b5630dbaSDimitry Andric if (Tok.K == Identifier && Tok.Value[0] == '@') {
250044eb2f6SDimitry Andric if (Tok.Value == "@") {
251044eb2f6SDimitry Andric // "foo @ 10"
252044eb2f6SDimitry Andric read();
253044eb2f6SDimitry Andric Tok.Value.getAsInteger(10, E.Ordinal);
254044eb2f6SDimitry Andric } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
255044eb2f6SDimitry Andric // "foo \n @bar" - Not an ordinal modifier at all, but the next
256044eb2f6SDimitry Andric // export (fastcall decorated) - complete the current one.
2575e529592SDimitry Andric unget();
2585e529592SDimitry Andric Info.Exports.push_back(E);
2595e529592SDimitry Andric return Error::success();
2605e529592SDimitry Andric }
261044eb2f6SDimitry Andric // "foo @10"
262b5630dbaSDimitry Andric read();
263b5630dbaSDimitry Andric if (Tok.K == KwNoname) {
264b5630dbaSDimitry Andric E.Noname = true;
265b5630dbaSDimitry Andric } else {
266b5630dbaSDimitry Andric unget();
267b5630dbaSDimitry Andric }
268b5630dbaSDimitry Andric continue;
269b5630dbaSDimitry Andric }
270b5630dbaSDimitry Andric if (Tok.K == KwData) {
271b5630dbaSDimitry Andric E.Data = true;
272b5630dbaSDimitry Andric continue;
273b5630dbaSDimitry Andric }
274b5630dbaSDimitry Andric if (Tok.K == KwConstant) {
275b5630dbaSDimitry Andric E.Constant = true;
276b5630dbaSDimitry Andric continue;
277b5630dbaSDimitry Andric }
278b5630dbaSDimitry Andric if (Tok.K == KwPrivate) {
279b5630dbaSDimitry Andric E.Private = true;
280b5630dbaSDimitry Andric continue;
281b5630dbaSDimitry Andric }
282eb11fae6SDimitry Andric if (Tok.K == EqualEqual) {
283eb11fae6SDimitry Andric read();
284ac9a064cSDimitry Andric E.ImportName = std::string(Tok.Value);
285eb11fae6SDimitry Andric continue;
286eb11fae6SDimitry Andric }
287ac9a064cSDimitry Andric // EXPORTAS must be at the end of export definition
288ac9a064cSDimitry Andric if (Tok.K == KwExportAs) {
289ac9a064cSDimitry Andric read();
290ac9a064cSDimitry Andric if (Tok.K == Eof)
291ac9a064cSDimitry Andric return createError(
292ac9a064cSDimitry Andric "unexpected end of file, EXPORTAS identifier expected");
293ac9a064cSDimitry Andric E.ExportAs = std::string(Tok.Value);
294ac9a064cSDimitry Andric } else {
295b5630dbaSDimitry Andric unget();
296ac9a064cSDimitry Andric }
297b5630dbaSDimitry Andric Info.Exports.push_back(E);
298b5630dbaSDimitry Andric return Error::success();
299b5630dbaSDimitry Andric }
300b5630dbaSDimitry Andric }
301b5630dbaSDimitry Andric
302b5630dbaSDimitry Andric // HEAPSIZE/STACKSIZE reserve[,commit]
parseNumbers(uint64_t * Reserve,uint64_t * Commit)303b5630dbaSDimitry Andric Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
304b5630dbaSDimitry Andric if (Error Err = readAsInt(Reserve))
305b5630dbaSDimitry Andric return Err;
306b5630dbaSDimitry Andric read();
307b5630dbaSDimitry Andric if (Tok.K != Comma) {
308b5630dbaSDimitry Andric unget();
309b5630dbaSDimitry Andric Commit = nullptr;
310b5630dbaSDimitry Andric return Error::success();
311b5630dbaSDimitry Andric }
312b5630dbaSDimitry Andric if (Error Err = readAsInt(Commit))
313b5630dbaSDimitry Andric return Err;
314b5630dbaSDimitry Andric return Error::success();
315b5630dbaSDimitry Andric }
316b5630dbaSDimitry Andric
317b5630dbaSDimitry Andric // NAME outputPath [BASE=address]
parseName(std::string * Out,uint64_t * Baseaddr)318b5630dbaSDimitry Andric Error parseName(std::string *Out, uint64_t *Baseaddr) {
319b5630dbaSDimitry Andric read();
320b5630dbaSDimitry Andric if (Tok.K == Identifier) {
321cfca06d7SDimitry Andric *Out = std::string(Tok.Value);
322b5630dbaSDimitry Andric } else {
323b5630dbaSDimitry Andric *Out = "";
324b5630dbaSDimitry Andric unget();
325b5630dbaSDimitry Andric return Error::success();
326b5630dbaSDimitry Andric }
327b5630dbaSDimitry Andric read();
328b5630dbaSDimitry Andric if (Tok.K == KwBase) {
329b5630dbaSDimitry Andric if (Error Err = expect(Equal, "'=' expected"))
330b5630dbaSDimitry Andric return Err;
331b5630dbaSDimitry Andric if (Error Err = readAsInt(Baseaddr))
332b5630dbaSDimitry Andric return Err;
333b5630dbaSDimitry Andric } else {
334b5630dbaSDimitry Andric unget();
335b5630dbaSDimitry Andric *Baseaddr = 0;
336b5630dbaSDimitry Andric }
337b5630dbaSDimitry Andric return Error::success();
338b5630dbaSDimitry Andric }
339b5630dbaSDimitry Andric
340b5630dbaSDimitry Andric // VERSION major[.minor]
parseVersion(uint32_t * Major,uint32_t * Minor)341b5630dbaSDimitry Andric Error parseVersion(uint32_t *Major, uint32_t *Minor) {
342b5630dbaSDimitry Andric read();
343b5630dbaSDimitry Andric if (Tok.K != Identifier)
344b5630dbaSDimitry Andric return createError("identifier expected, but got " + Tok.Value);
345b5630dbaSDimitry Andric StringRef V1, V2;
346b5630dbaSDimitry Andric std::tie(V1, V2) = Tok.Value.split('.');
347b5630dbaSDimitry Andric if (V1.getAsInteger(10, *Major))
348b5630dbaSDimitry Andric return createError("integer expected, but got " + Tok.Value);
349b5630dbaSDimitry Andric if (V2.empty())
350b5630dbaSDimitry Andric *Minor = 0;
351b5630dbaSDimitry Andric else if (V2.getAsInteger(10, *Minor))
352b5630dbaSDimitry Andric return createError("integer expected, but got " + Tok.Value);
353b5630dbaSDimitry Andric return Error::success();
354b5630dbaSDimitry Andric }
355b5630dbaSDimitry Andric
356b5630dbaSDimitry Andric Lexer Lex;
357b5630dbaSDimitry Andric Token Tok;
358b5630dbaSDimitry Andric std::vector<Token> Stack;
359b5630dbaSDimitry Andric MachineTypes Machine;
360b5630dbaSDimitry Andric COFFModuleDefinition Info;
36193c91e39SDimitry Andric bool MingwDef;
3627fa27ce4SDimitry Andric bool AddUnderscores;
363b5630dbaSDimitry Andric };
364b5630dbaSDimitry Andric
parseCOFFModuleDefinition(MemoryBufferRef MB,MachineTypes Machine,bool MingwDef,bool AddUnderscores)365b5630dbaSDimitry Andric Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
36693c91e39SDimitry Andric MachineTypes Machine,
3677fa27ce4SDimitry Andric bool MingwDef,
3687fa27ce4SDimitry Andric bool AddUnderscores) {
3697fa27ce4SDimitry Andric return Parser(MB.getBuffer(), Machine, MingwDef, AddUnderscores).parse();
370b5630dbaSDimitry Andric }
371b5630dbaSDimitry Andric
372b5630dbaSDimitry Andric } // namespace object
373b5630dbaSDimitry Andric } // namespace llvm
374