112f3ca4cSDimitry Andric //===- ModuleSymbolTable.cpp - symbol table for in-memory IR --------------===//
2b915e9e0SDimitry Andric //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6b915e9e0SDimitry Andric //
7b915e9e0SDimitry Andric //===----------------------------------------------------------------------===//
8b915e9e0SDimitry Andric //
9b915e9e0SDimitry Andric // This class represents a symbol table built from in-memory IR. It provides
10b915e9e0SDimitry Andric // access to GlobalValues and should only be used if such access is required
11b915e9e0SDimitry Andric // (e.g. in the LTO implementation).
12b915e9e0SDimitry Andric //
13b915e9e0SDimitry Andric //===----------------------------------------------------------------------===//
14b915e9e0SDimitry Andric
157ab83427SDimitry Andric #include "llvm/Object/ModuleSymbolTable.h"
16b915e9e0SDimitry Andric #include "RecordStreamer.h"
17b915e9e0SDimitry Andric #include "llvm/ADT/STLExtras.h"
1812f3ca4cSDimitry Andric #include "llvm/ADT/StringRef.h"
1999aabd70SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
2012f3ca4cSDimitry Andric #include "llvm/IR/Function.h"
2112f3ca4cSDimitry Andric #include "llvm/IR/GlobalAlias.h"
2212f3ca4cSDimitry Andric #include "llvm/IR/GlobalValue.h"
2312f3ca4cSDimitry Andric #include "llvm/IR/GlobalVariable.h"
24cfca06d7SDimitry Andric #include "llvm/IR/InlineAsm.h"
25b915e9e0SDimitry Andric #include "llvm/IR/Module.h"
26b915e9e0SDimitry Andric #include "llvm/MC/MCAsmInfo.h"
27b915e9e0SDimitry Andric #include "llvm/MC/MCContext.h"
28b915e9e0SDimitry Andric #include "llvm/MC/MCInstrInfo.h"
29b915e9e0SDimitry Andric #include "llvm/MC/MCObjectFileInfo.h"
30b915e9e0SDimitry Andric #include "llvm/MC/MCParser/MCAsmParser.h"
31b915e9e0SDimitry Andric #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32b915e9e0SDimitry Andric #include "llvm/MC/MCRegisterInfo.h"
33b915e9e0SDimitry Andric #include "llvm/MC/MCSubtargetInfo.h"
3412f3ca4cSDimitry Andric #include "llvm/MC/MCSymbol.h"
3512f3ca4cSDimitry Andric #include "llvm/MC/MCTargetOptions.h"
36c0981da4SDimitry Andric #include "llvm/MC/TargetRegistry.h"
3712f3ca4cSDimitry Andric #include "llvm/Object/SymbolicFile.h"
3812f3ca4cSDimitry Andric #include "llvm/Support/Casting.h"
3912f3ca4cSDimitry Andric #include "llvm/Support/ErrorHandling.h"
40b915e9e0SDimitry Andric #include "llvm/Support/MemoryBuffer.h"
4112f3ca4cSDimitry Andric #include "llvm/Support/SMLoc.h"
42b915e9e0SDimitry Andric #include "llvm/Support/SourceMgr.h"
437ab83427SDimitry Andric #include "llvm/Support/raw_ostream.h"
447fa27ce4SDimitry Andric #include "llvm/TargetParser/Triple.h"
4512f3ca4cSDimitry Andric #include <algorithm>
4612f3ca4cSDimitry Andric #include <cassert>
4712f3ca4cSDimitry Andric #include <cstdint>
4812f3ca4cSDimitry Andric #include <memory>
4912f3ca4cSDimitry Andric #include <string>
5012f3ca4cSDimitry Andric
51b915e9e0SDimitry Andric using namespace llvm;
52b915e9e0SDimitry Andric using namespace object;
53b915e9e0SDimitry Andric
addModule(Module * M)54b915e9e0SDimitry Andric void ModuleSymbolTable::addModule(Module *M) {
55b915e9e0SDimitry Andric if (FirstMod)
56b915e9e0SDimitry Andric assert(FirstMod->getTargetTriple() == M->getTargetTriple());
57b915e9e0SDimitry Andric else
58b915e9e0SDimitry Andric FirstMod = M;
59b915e9e0SDimitry Andric
6071d5a254SDimitry Andric for (GlobalValue &GV : M->global_values())
61b915e9e0SDimitry Andric SymTab.push_back(&GV);
62b915e9e0SDimitry Andric
6371d5a254SDimitry Andric CollectAsmSymbols(*M, [this](StringRef Name, BasicSymbolRef::Flags Flags) {
64cfca06d7SDimitry Andric SymTab.push_back(new (AsmSymbols.Allocate())
65cfca06d7SDimitry Andric AsmSymbol(std::string(Name), Flags));
66b915e9e0SDimitry Andric });
67b915e9e0SDimitry Andric }
68b915e9e0SDimitry Andric
69eb11fae6SDimitry Andric static void
initializeRecordStreamer(const Module & M,function_ref<void (RecordStreamer &)> Init)70eb11fae6SDimitry Andric initializeRecordStreamer(const Module &M,
71eb11fae6SDimitry Andric function_ref<void(RecordStreamer &)> Init) {
7299aabd70SDimitry Andric // This function may be called twice, once for ModuleSummaryIndexAnalysis and
7399aabd70SDimitry Andric // the other when writing the IR symbol table. If parsing inline assembly has
7499aabd70SDimitry Andric // caused errors in the first run, suppress the second run.
7599aabd70SDimitry Andric if (M.getContext().getDiagHandlerPtr()->HasErrors)
7699aabd70SDimitry Andric return;
7771d5a254SDimitry Andric StringRef InlineAsm = M.getModuleInlineAsm();
78b915e9e0SDimitry Andric if (InlineAsm.empty())
79b915e9e0SDimitry Andric return;
80b915e9e0SDimitry Andric
81b915e9e0SDimitry Andric std::string Err;
8271d5a254SDimitry Andric const Triple TT(M.getTargetTriple());
83b915e9e0SDimitry Andric const Target *T = TargetRegistry::lookupTarget(TT.str(), Err);
84b915e9e0SDimitry Andric assert(T && T->hasMCAsmParser());
85b915e9e0SDimitry Andric
86b915e9e0SDimitry Andric std::unique_ptr<MCRegisterInfo> MRI(T->createMCRegInfo(TT.str()));
87b915e9e0SDimitry Andric if (!MRI)
88b915e9e0SDimitry Andric return;
89b915e9e0SDimitry Andric
90706b4fc4SDimitry Andric MCTargetOptions MCOptions;
91706b4fc4SDimitry Andric std::unique_ptr<MCAsmInfo> MAI(T->createMCAsmInfo(*MRI, TT.str(), MCOptions));
92b915e9e0SDimitry Andric if (!MAI)
93b915e9e0SDimitry Andric return;
94b915e9e0SDimitry Andric
95b915e9e0SDimitry Andric std::unique_ptr<MCSubtargetInfo> STI(
96b915e9e0SDimitry Andric T->createMCSubtargetInfo(TT.str(), "", ""));
97b915e9e0SDimitry Andric if (!STI)
98b915e9e0SDimitry Andric return;
99b915e9e0SDimitry Andric
100b915e9e0SDimitry Andric std::unique_ptr<MCInstrInfo> MCII(T->createMCInstrInfo());
101b915e9e0SDimitry Andric if (!MCII)
102b915e9e0SDimitry Andric return;
103b915e9e0SDimitry Andric
10499aabd70SDimitry Andric std::unique_ptr<MemoryBuffer> Buffer(
10599aabd70SDimitry Andric MemoryBuffer::getMemBuffer(InlineAsm, "<inline asm>"));
106b915e9e0SDimitry Andric SourceMgr SrcMgr;
107b915e9e0SDimitry Andric SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
108344a3780SDimitry Andric
109344a3780SDimitry Andric MCContext MCCtx(TT, MAI.get(), MRI.get(), STI.get(), &SrcMgr);
110344a3780SDimitry Andric std::unique_ptr<MCObjectFileInfo> MOFI(
111344a3780SDimitry Andric T->createMCObjectFileInfo(MCCtx, /*PIC=*/false));
112344a3780SDimitry Andric MOFI->setSDKVersion(M.getSDKVersion());
113344a3780SDimitry Andric MCCtx.setObjectFileInfo(MOFI.get());
114344a3780SDimitry Andric RecordStreamer Streamer(MCCtx, M);
115344a3780SDimitry Andric T->createNullTargetStreamer(Streamer);
116344a3780SDimitry Andric
117b915e9e0SDimitry Andric std::unique_ptr<MCAsmParser> Parser(
118b915e9e0SDimitry Andric createMCAsmParser(SrcMgr, MCCtx, Streamer, *MAI));
119b915e9e0SDimitry Andric
120b915e9e0SDimitry Andric std::unique_ptr<MCTargetAsmParser> TAP(
121b915e9e0SDimitry Andric T->createMCAsmParser(*STI, *Parser, *MCII, MCOptions));
122b915e9e0SDimitry Andric if (!TAP)
123b915e9e0SDimitry Andric return;
124b915e9e0SDimitry Andric
12599aabd70SDimitry Andric MCCtx.setDiagnosticHandler([&](const SMDiagnostic &SMD, bool IsInlineAsm,
12699aabd70SDimitry Andric const SourceMgr &SrcMgr,
12799aabd70SDimitry Andric std::vector<const MDNode *> &LocInfos) {
12899aabd70SDimitry Andric M.getContext().diagnose(
12999aabd70SDimitry Andric DiagnosticInfoSrcMgr(SMD, M.getName(), IsInlineAsm, /*LocCookie=*/0));
13099aabd70SDimitry Andric });
13199aabd70SDimitry Andric
132cfca06d7SDimitry Andric // Module-level inline asm is assumed to use At&t syntax (see
133cfca06d7SDimitry Andric // AsmPrinter::doInitialization()).
134cfca06d7SDimitry Andric Parser->setAssemblerDialect(InlineAsm::AD_ATT);
135cfca06d7SDimitry Andric
136b915e9e0SDimitry Andric Parser->setTargetParser(*TAP);
137b915e9e0SDimitry Andric if (Parser->Run(false))
138b915e9e0SDimitry Andric return;
139b915e9e0SDimitry Andric
140eb11fae6SDimitry Andric Init(Streamer);
141eb11fae6SDimitry Andric }
142eb11fae6SDimitry Andric
CollectAsmSymbols(const Module & M,function_ref<void (StringRef,BasicSymbolRef::Flags)> AsmSymbol)143eb11fae6SDimitry Andric void ModuleSymbolTable::CollectAsmSymbols(
144eb11fae6SDimitry Andric const Module &M,
145eb11fae6SDimitry Andric function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) {
146eb11fae6SDimitry Andric initializeRecordStreamer(M, [&](RecordStreamer &Streamer) {
147eb11fae6SDimitry Andric Streamer.flushSymverDirectives();
14871d5a254SDimitry Andric
149b915e9e0SDimitry Andric for (auto &KV : Streamer) {
150b915e9e0SDimitry Andric StringRef Key = KV.first();
151b915e9e0SDimitry Andric RecordStreamer::State Value = KV.second;
152b915e9e0SDimitry Andric // FIXME: For now we just assume that all asm symbols are executable.
153b915e9e0SDimitry Andric uint32_t Res = BasicSymbolRef::SF_Executable;
154b915e9e0SDimitry Andric switch (Value) {
155b915e9e0SDimitry Andric case RecordStreamer::NeverSeen:
156b915e9e0SDimitry Andric llvm_unreachable("NeverSeen should have been replaced earlier");
157b915e9e0SDimitry Andric case RecordStreamer::DefinedGlobal:
158b915e9e0SDimitry Andric Res |= BasicSymbolRef::SF_Global;
159b915e9e0SDimitry Andric break;
160b915e9e0SDimitry Andric case RecordStreamer::Defined:
161b915e9e0SDimitry Andric break;
162b915e9e0SDimitry Andric case RecordStreamer::Global:
163b915e9e0SDimitry Andric case RecordStreamer::Used:
164b915e9e0SDimitry Andric Res |= BasicSymbolRef::SF_Undefined;
165b915e9e0SDimitry Andric Res |= BasicSymbolRef::SF_Global;
166b915e9e0SDimitry Andric break;
167b915e9e0SDimitry Andric case RecordStreamer::DefinedWeak:
168b915e9e0SDimitry Andric Res |= BasicSymbolRef::SF_Weak;
169b915e9e0SDimitry Andric Res |= BasicSymbolRef::SF_Global;
170b915e9e0SDimitry Andric break;
171b915e9e0SDimitry Andric case RecordStreamer::UndefinedWeak:
172b915e9e0SDimitry Andric Res |= BasicSymbolRef::SF_Weak;
173b915e9e0SDimitry Andric Res |= BasicSymbolRef::SF_Undefined;
174b915e9e0SDimitry Andric }
175b915e9e0SDimitry Andric AsmSymbol(Key, BasicSymbolRef::Flags(Res));
176b915e9e0SDimitry Andric }
177eb11fae6SDimitry Andric });
178ac9a064cSDimitry Andric
179ac9a064cSDimitry Andric // In ELF, object code generated for x86-32 and some code models of x86-64 may
180ac9a064cSDimitry Andric // reference the special symbol _GLOBAL_OFFSET_TABLE_ that is not used in the
181ac9a064cSDimitry Andric // IR. Record it like inline asm symbols.
182ac9a064cSDimitry Andric Triple TT(M.getTargetTriple());
183ac9a064cSDimitry Andric if (!TT.isOSBinFormatELF() || !TT.isX86())
184ac9a064cSDimitry Andric return;
185ac9a064cSDimitry Andric auto CM = M.getCodeModel();
186ac9a064cSDimitry Andric if (TT.getArch() == Triple::x86 || CM == CodeModel::Medium ||
187ac9a064cSDimitry Andric CM == CodeModel::Large) {
188ac9a064cSDimitry Andric AsmSymbol("_GLOBAL_OFFSET_TABLE_",
189ac9a064cSDimitry Andric BasicSymbolRef::Flags(BasicSymbolRef::SF_Undefined |
190ac9a064cSDimitry Andric BasicSymbolRef::SF_Global));
191ac9a064cSDimitry Andric }
192eb11fae6SDimitry Andric }
193eb11fae6SDimitry Andric
CollectAsmSymvers(const Module & M,function_ref<void (StringRef,StringRef)> AsmSymver)194eb11fae6SDimitry Andric void ModuleSymbolTable::CollectAsmSymvers(
195eb11fae6SDimitry Andric const Module &M, function_ref<void(StringRef, StringRef)> AsmSymver) {
196eb11fae6SDimitry Andric initializeRecordStreamer(M, [&](RecordStreamer &Streamer) {
197eb11fae6SDimitry Andric for (auto &KV : Streamer.symverAliases())
198eb11fae6SDimitry Andric for (auto &Alias : KV.second)
199eb11fae6SDimitry Andric AsmSymver(KV.first->getName(), Alias);
200eb11fae6SDimitry Andric });
201b915e9e0SDimitry Andric }
202b915e9e0SDimitry Andric
printSymbolName(raw_ostream & OS,Symbol S) const203b915e9e0SDimitry Andric void ModuleSymbolTable::printSymbolName(raw_ostream &OS, Symbol S) const {
2047fa27ce4SDimitry Andric if (isa<AsmSymbol *>(S)) {
2057fa27ce4SDimitry Andric OS << cast<AsmSymbol *>(S)->first;
206b915e9e0SDimitry Andric return;
207b915e9e0SDimitry Andric }
208b915e9e0SDimitry Andric
2097fa27ce4SDimitry Andric auto *GV = cast<GlobalValue *>(S);
210b915e9e0SDimitry Andric if (GV->hasDLLImportStorageClass())
211b915e9e0SDimitry Andric OS << "__imp_";
212b915e9e0SDimitry Andric
213b915e9e0SDimitry Andric Mang.getNameWithPrefix(OS, GV, false);
214b915e9e0SDimitry Andric }
215b915e9e0SDimitry Andric
getSymbolFlags(Symbol S) const216b915e9e0SDimitry Andric uint32_t ModuleSymbolTable::getSymbolFlags(Symbol S) const {
2177fa27ce4SDimitry Andric if (isa<AsmSymbol *>(S))
2187fa27ce4SDimitry Andric return cast<AsmSymbol *>(S)->second;
219b915e9e0SDimitry Andric
2207fa27ce4SDimitry Andric auto *GV = cast<GlobalValue *>(S);
221b915e9e0SDimitry Andric
222b915e9e0SDimitry Andric uint32_t Res = BasicSymbolRef::SF_None;
223b915e9e0SDimitry Andric if (GV->isDeclarationForLinker())
224b915e9e0SDimitry Andric Res |= BasicSymbolRef::SF_Undefined;
225b915e9e0SDimitry Andric else if (GV->hasHiddenVisibility() && !GV->hasLocalLinkage())
226b915e9e0SDimitry Andric Res |= BasicSymbolRef::SF_Hidden;
227b915e9e0SDimitry Andric if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
228b915e9e0SDimitry Andric if (GVar->isConstant())
229b915e9e0SDimitry Andric Res |= BasicSymbolRef::SF_Const;
230b915e9e0SDimitry Andric }
231c0981da4SDimitry Andric if (const GlobalObject *GO = GV->getAliaseeObject())
232c0981da4SDimitry Andric if (isa<Function>(GO) || isa<GlobalIFunc>(GO))
233b915e9e0SDimitry Andric Res |= BasicSymbolRef::SF_Executable;
234b915e9e0SDimitry Andric if (isa<GlobalAlias>(GV))
235b915e9e0SDimitry Andric Res |= BasicSymbolRef::SF_Indirect;
236b915e9e0SDimitry Andric if (GV->hasPrivateLinkage())
237b915e9e0SDimitry Andric Res |= BasicSymbolRef::SF_FormatSpecific;
238b915e9e0SDimitry Andric if (!GV->hasLocalLinkage())
239b915e9e0SDimitry Andric Res |= BasicSymbolRef::SF_Global;
240b915e9e0SDimitry Andric if (GV->hasCommonLinkage())
241b915e9e0SDimitry Andric Res |= BasicSymbolRef::SF_Common;
242b915e9e0SDimitry Andric if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() ||
243b915e9e0SDimitry Andric GV->hasExternalWeakLinkage())
244b915e9e0SDimitry Andric Res |= BasicSymbolRef::SF_Weak;
245b915e9e0SDimitry Andric
246312c0ed1SDimitry Andric if (GV->getName().starts_with("llvm."))
247b915e9e0SDimitry Andric Res |= BasicSymbolRef::SF_FormatSpecific;
248b915e9e0SDimitry Andric else if (auto *Var = dyn_cast<GlobalVariable>(GV)) {
249b915e9e0SDimitry Andric if (Var->getSection() == "llvm.metadata")
250b915e9e0SDimitry Andric Res |= BasicSymbolRef::SF_FormatSpecific;
251b915e9e0SDimitry Andric }
252b915e9e0SDimitry Andric
253b915e9e0SDimitry Andric return Res;
254b915e9e0SDimitry Andric }
255