xref: /src/contrib/llvm-project/lldb/source/Plugins/SymbolFile/NativePDB/CompileUnitIndex.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad) !
1cfca06d7SDimitry Andric //===-- CompileUnitIndex.cpp ----------------------------------------------===//
294994d37SDimitry Andric //
35f29bb8aSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45f29bb8aSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55f29bb8aSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
694994d37SDimitry Andric //
794994d37SDimitry Andric //===----------------------------------------------------------------------===//
894994d37SDimitry Andric 
994994d37SDimitry Andric #include "CompileUnitIndex.h"
1094994d37SDimitry Andric 
1194994d37SDimitry Andric #include "PdbIndex.h"
1294994d37SDimitry Andric #include "PdbUtil.h"
1394994d37SDimitry Andric 
1494994d37SDimitry Andric #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
1594994d37SDimitry Andric #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
1694994d37SDimitry Andric #include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
1794994d37SDimitry Andric #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
1894994d37SDimitry Andric #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
1994994d37SDimitry Andric #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
2094994d37SDimitry Andric #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
2194994d37SDimitry Andric #include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
2294994d37SDimitry Andric #include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
2394994d37SDimitry Andric #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
2494994d37SDimitry Andric #include "llvm/Support/Path.h"
2594994d37SDimitry Andric 
2694994d37SDimitry Andric #include "lldb/Utility/LLDBAssert.h"
2794994d37SDimitry Andric 
2894994d37SDimitry Andric using namespace lldb;
2994994d37SDimitry Andric using namespace lldb_private;
3094994d37SDimitry Andric using namespace lldb_private::npdb;
3194994d37SDimitry Andric using namespace llvm::codeview;
3294994d37SDimitry Andric using namespace llvm::pdb;
3394994d37SDimitry Andric 
IsMainFile(llvm::StringRef main,llvm::StringRef other)3494994d37SDimitry Andric static bool IsMainFile(llvm::StringRef main, llvm::StringRef other) {
3594994d37SDimitry Andric   if (main == other)
3694994d37SDimitry Andric     return true;
3794994d37SDimitry Andric 
3894994d37SDimitry Andric   // If the files refer to the local file system, we can just ask the file
3994994d37SDimitry Andric   // system if they're equivalent.  But if the source isn't present on disk
4094994d37SDimitry Andric   // then we still want to try.
4194994d37SDimitry Andric   if (llvm::sys::fs::equivalent(main, other))
4294994d37SDimitry Andric     return true;
4394994d37SDimitry Andric 
4494994d37SDimitry Andric   llvm::SmallString<64> normalized(other);
4594994d37SDimitry Andric   llvm::sys::path::native(normalized);
46344a3780SDimitry Andric   return main.equals_insensitive(normalized);
4794994d37SDimitry Andric }
4894994d37SDimitry Andric 
ParseCompile3(const CVSymbol & sym,CompilandIndexItem & cci)4994994d37SDimitry Andric static void ParseCompile3(const CVSymbol &sym, CompilandIndexItem &cci) {
5094994d37SDimitry Andric   cci.m_compile_opts.emplace();
5194994d37SDimitry Andric   llvm::cantFail(
5294994d37SDimitry Andric       SymbolDeserializer::deserializeAs<Compile3Sym>(sym, *cci.m_compile_opts));
5394994d37SDimitry Andric }
5494994d37SDimitry Andric 
ParseObjname(const CVSymbol & sym,CompilandIndexItem & cci)5594994d37SDimitry Andric static void ParseObjname(const CVSymbol &sym, CompilandIndexItem &cci) {
5694994d37SDimitry Andric   cci.m_obj_name.emplace();
5794994d37SDimitry Andric   llvm::cantFail(
5894994d37SDimitry Andric       SymbolDeserializer::deserializeAs<ObjNameSym>(sym, *cci.m_obj_name));
5994994d37SDimitry Andric }
6094994d37SDimitry Andric 
ParseBuildInfo(PdbIndex & index,const CVSymbol & sym,CompilandIndexItem & cci)6194994d37SDimitry Andric static void ParseBuildInfo(PdbIndex &index, const CVSymbol &sym,
6294994d37SDimitry Andric                            CompilandIndexItem &cci) {
6394994d37SDimitry Andric   BuildInfoSym bis(SymbolRecordKind::BuildInfoSym);
6494994d37SDimitry Andric   llvm::cantFail(SymbolDeserializer::deserializeAs<BuildInfoSym>(sym, bis));
6594994d37SDimitry Andric 
6694994d37SDimitry Andric   // S_BUILDINFO just points to an LF_BUILDINFO in the IPI stream.  Let's do
6794994d37SDimitry Andric   // a little extra work to pull out the LF_BUILDINFO.
6894994d37SDimitry Andric   LazyRandomTypeCollection &types = index.ipi().typeCollection();
69e3b55780SDimitry Andric   std::optional<CVType> cvt = types.tryGetType(bis.BuildId);
7094994d37SDimitry Andric 
7194994d37SDimitry Andric   if (!cvt || cvt->kind() != LF_BUILDINFO)
7294994d37SDimitry Andric     return;
7394994d37SDimitry Andric 
7494994d37SDimitry Andric   BuildInfoRecord bir;
7594994d37SDimitry Andric   llvm::cantFail(TypeDeserializer::deserializeAs<BuildInfoRecord>(*cvt, bir));
7694994d37SDimitry Andric   cci.m_build_info.assign(bir.ArgIndices.begin(), bir.ArgIndices.end());
7794994d37SDimitry Andric }
7894994d37SDimitry Andric 
ParseExtendedInfo(PdbIndex & index,CompilandIndexItem & item)7994994d37SDimitry Andric static void ParseExtendedInfo(PdbIndex &index, CompilandIndexItem &item) {
8094994d37SDimitry Andric   const CVSymbolArray &syms = item.m_debug_stream.getSymbolArray();
8194994d37SDimitry Andric 
8294994d37SDimitry Andric   // This is a private function, it shouldn't be called if the information
8394994d37SDimitry Andric   // has already been parsed.
8494994d37SDimitry Andric   lldbassert(!item.m_obj_name);
8594994d37SDimitry Andric   lldbassert(!item.m_compile_opts);
8694994d37SDimitry Andric   lldbassert(item.m_build_info.empty());
8794994d37SDimitry Andric 
8894994d37SDimitry Andric   // We're looking for 3 things.  S_COMPILE3, S_OBJNAME, and S_BUILDINFO.
8994994d37SDimitry Andric   int found = 0;
9094994d37SDimitry Andric   for (const CVSymbol &sym : syms) {
9194994d37SDimitry Andric     switch (sym.kind()) {
9294994d37SDimitry Andric     case S_COMPILE3:
9394994d37SDimitry Andric       ParseCompile3(sym, item);
9494994d37SDimitry Andric       break;
9594994d37SDimitry Andric     case S_OBJNAME:
9694994d37SDimitry Andric       ParseObjname(sym, item);
9794994d37SDimitry Andric       break;
9894994d37SDimitry Andric     case S_BUILDINFO:
9994994d37SDimitry Andric       ParseBuildInfo(index, sym, item);
10094994d37SDimitry Andric       break;
10194994d37SDimitry Andric     default:
10294994d37SDimitry Andric       continue;
10394994d37SDimitry Andric     }
10494994d37SDimitry Andric     if (++found >= 3)
10594994d37SDimitry Andric       break;
10694994d37SDimitry Andric   }
10794994d37SDimitry Andric }
10894994d37SDimitry Andric 
ParseInlineeLineTableForCompileUnit(CompilandIndexItem & item)1096f8fc217SDimitry Andric static void ParseInlineeLineTableForCompileUnit(CompilandIndexItem &item) {
1106f8fc217SDimitry Andric   for (const auto &ss : item.m_debug_stream.getSubsectionsArray()) {
1116f8fc217SDimitry Andric     if (ss.kind() != DebugSubsectionKind::InlineeLines)
1126f8fc217SDimitry Andric       continue;
1136f8fc217SDimitry Andric 
1146f8fc217SDimitry Andric     DebugInlineeLinesSubsectionRef inlinee_lines;
1156f8fc217SDimitry Andric     llvm::BinaryStreamReader reader(ss.getRecordData());
1166f8fc217SDimitry Andric     if (llvm::Error error = inlinee_lines.initialize(reader)) {
1176f8fc217SDimitry Andric       consumeError(std::move(error));
1186f8fc217SDimitry Andric       continue;
1196f8fc217SDimitry Andric     }
1206f8fc217SDimitry Andric 
1216f8fc217SDimitry Andric     for (const InlineeSourceLine &Line : inlinee_lines) {
1226f8fc217SDimitry Andric       item.m_inline_map[Line.Header->Inlinee] = Line;
1236f8fc217SDimitry Andric     }
1246f8fc217SDimitry Andric   }
1256f8fc217SDimitry Andric }
1266f8fc217SDimitry Andric 
CompilandIndexItem(PdbCompilandId id,llvm::pdb::ModuleDebugStreamRef debug_stream,llvm::pdb::DbiModuleDescriptor descriptor)12794994d37SDimitry Andric CompilandIndexItem::CompilandIndexItem(
12894994d37SDimitry Andric     PdbCompilandId id, llvm::pdb::ModuleDebugStreamRef debug_stream,
12994994d37SDimitry Andric     llvm::pdb::DbiModuleDescriptor descriptor)
13094994d37SDimitry Andric     : m_id(id), m_debug_stream(std::move(debug_stream)),
13194994d37SDimitry Andric       m_module_descriptor(std::move(descriptor)) {}
13294994d37SDimitry Andric 
GetOrCreateCompiland(uint16_t modi)13394994d37SDimitry Andric CompilandIndexItem &CompileUnitIndex::GetOrCreateCompiland(uint16_t modi) {
13494994d37SDimitry Andric   auto result = m_comp_units.try_emplace(modi, nullptr);
13594994d37SDimitry Andric   if (!result.second)
13694994d37SDimitry Andric     return *result.first->second;
13794994d37SDimitry Andric 
13894994d37SDimitry Andric   // Find the module list and load its debug information stream and cache it
13994994d37SDimitry Andric   // since we need to use it for almost all interesting operations.
14094994d37SDimitry Andric   const DbiModuleList &modules = m_index.dbi().modules();
14194994d37SDimitry Andric   llvm::pdb::DbiModuleDescriptor descriptor = modules.getModuleDescriptor(modi);
14294994d37SDimitry Andric   uint16_t stream = descriptor.getModuleStreamIndex();
14394994d37SDimitry Andric   std::unique_ptr<llvm::msf::MappedBlockStream> stream_data =
14494994d37SDimitry Andric       m_index.pdb().createIndexedStream(stream);
1455f29bb8aSDimitry Andric 
14694994d37SDimitry Andric 
14794994d37SDimitry Andric   std::unique_ptr<CompilandIndexItem>& cci = result.first->second;
14894994d37SDimitry Andric 
1495f29bb8aSDimitry Andric   if (!stream_data) {
1505f29bb8aSDimitry Andric     llvm::pdb::ModuleDebugStreamRef debug_stream(descriptor, nullptr);
151ead24645SDimitry Andric     cci = std::make_unique<CompilandIndexItem>(PdbCompilandId{ modi }, debug_stream, std::move(descriptor));
1525f29bb8aSDimitry Andric     return *cci;
1535f29bb8aSDimitry Andric   }
1545f29bb8aSDimitry Andric 
1555f29bb8aSDimitry Andric   llvm::pdb::ModuleDebugStreamRef debug_stream(descriptor,
1565f29bb8aSDimitry Andric                                                std::move(stream_data));
1575f29bb8aSDimitry Andric 
1585f29bb8aSDimitry Andric   cantFail(debug_stream.reload());
1595f29bb8aSDimitry Andric 
160ead24645SDimitry Andric   cci = std::make_unique<CompilandIndexItem>(
16194994d37SDimitry Andric       PdbCompilandId{modi}, std::move(debug_stream), std::move(descriptor));
16294994d37SDimitry Andric   ParseExtendedInfo(m_index, *cci);
1636f8fc217SDimitry Andric   ParseInlineeLineTableForCompileUnit(*cci);
16494994d37SDimitry Andric 
1657fa27ce4SDimitry Andric   auto strings = m_index.pdb().getStringTable();
1667fa27ce4SDimitry Andric   if (strings) {
167145449b1SDimitry Andric     cci->m_strings.initialize(cci->m_debug_stream.getSubsectionsArray());
1687fa27ce4SDimitry Andric     cci->m_strings.setStrings(strings->getStringTable());
1697fa27ce4SDimitry Andric   } else {
1707fa27ce4SDimitry Andric     consumeError(strings.takeError());
1717fa27ce4SDimitry Andric   }
17294994d37SDimitry Andric 
17394994d37SDimitry Andric   // We want the main source file to always comes first.  Note that we can't
17494994d37SDimitry Andric   // just push_back the main file onto the front because `GetMainSourceFile`
17594994d37SDimitry Andric   // computes it in such a way that it doesn't own the resulting memory.  So we
17694994d37SDimitry Andric   // have to iterate the module file list comparing each one to the main file
17794994d37SDimitry Andric   // name until we find it, and we can cache that one since the memory is backed
17894994d37SDimitry Andric   // by a contiguous chunk inside the mapped PDB.
17994994d37SDimitry Andric   llvm::SmallString<64> main_file = GetMainSourceFile(*cci);
180cfca06d7SDimitry Andric   std::string s = std::string(main_file.str());
18194994d37SDimitry Andric   llvm::sys::path::native(main_file);
18294994d37SDimitry Andric 
18394994d37SDimitry Andric   uint32_t file_count = modules.getSourceFileCount(modi);
18494994d37SDimitry Andric   cci->m_file_list.reserve(file_count);
18594994d37SDimitry Andric   bool found_main_file = false;
18694994d37SDimitry Andric   for (llvm::StringRef file : modules.source_files(modi)) {
18794994d37SDimitry Andric     if (!found_main_file && IsMainFile(main_file, file)) {
18894994d37SDimitry Andric       cci->m_file_list.insert(cci->m_file_list.begin(), file);
18994994d37SDimitry Andric       found_main_file = true;
19094994d37SDimitry Andric       continue;
19194994d37SDimitry Andric     }
19294994d37SDimitry Andric     cci->m_file_list.push_back(file);
19394994d37SDimitry Andric   }
19494994d37SDimitry Andric 
19594994d37SDimitry Andric   return *cci;
19694994d37SDimitry Andric }
19794994d37SDimitry Andric 
GetCompiland(uint16_t modi) const19894994d37SDimitry Andric const CompilandIndexItem *CompileUnitIndex::GetCompiland(uint16_t modi) const {
19994994d37SDimitry Andric   auto iter = m_comp_units.find(modi);
20094994d37SDimitry Andric   if (iter == m_comp_units.end())
20194994d37SDimitry Andric     return nullptr;
20294994d37SDimitry Andric   return iter->second.get();
20394994d37SDimitry Andric }
20494994d37SDimitry Andric 
GetCompiland(uint16_t modi)20594994d37SDimitry Andric CompilandIndexItem *CompileUnitIndex::GetCompiland(uint16_t modi) {
20694994d37SDimitry Andric   auto iter = m_comp_units.find(modi);
20794994d37SDimitry Andric   if (iter == m_comp_units.end())
20894994d37SDimitry Andric     return nullptr;
20994994d37SDimitry Andric   return iter->second.get();
21094994d37SDimitry Andric }
21194994d37SDimitry Andric 
21294994d37SDimitry Andric llvm::SmallString<64>
GetMainSourceFile(const CompilandIndexItem & item) const21394994d37SDimitry Andric CompileUnitIndex::GetMainSourceFile(const CompilandIndexItem &item) const {
21494994d37SDimitry Andric   // LF_BUILDINFO contains a list of arg indices which point to LF_STRING_ID
21594994d37SDimitry Andric   // records in the IPI stream.  The order of the arg indices is as follows:
21694994d37SDimitry Andric   // [0] - working directory where compiler was invoked.
21794994d37SDimitry Andric   // [1] - absolute path to compiler binary
21894994d37SDimitry Andric   // [2] - source file name
21994994d37SDimitry Andric   // [3] - path to compiler generated PDB (the /Zi PDB, although this entry gets
22094994d37SDimitry Andric   //       added even when using /Z7)
22194994d37SDimitry Andric   // [4] - full command line invocation.
22294994d37SDimitry Andric   //
22394994d37SDimitry Andric   // We need to form the path [0]\[2] to generate the full path to the main
22494994d37SDimitry Andric   // file.source
22594994d37SDimitry Andric   if (item.m_build_info.size() < 3)
22694994d37SDimitry Andric     return {""};
22794994d37SDimitry Andric 
22894994d37SDimitry Andric   LazyRandomTypeCollection &types = m_index.ipi().typeCollection();
22994994d37SDimitry Andric 
23094994d37SDimitry Andric   StringIdRecord working_dir;
23194994d37SDimitry Andric   StringIdRecord file_name;
23294994d37SDimitry Andric   CVType dir_cvt = types.getType(item.m_build_info[0]);
23394994d37SDimitry Andric   CVType file_cvt = types.getType(item.m_build_info[2]);
23494994d37SDimitry Andric   llvm::cantFail(
23594994d37SDimitry Andric       TypeDeserializer::deserializeAs<StringIdRecord>(dir_cvt, working_dir));
23694994d37SDimitry Andric   llvm::cantFail(
23794994d37SDimitry Andric       TypeDeserializer::deserializeAs<StringIdRecord>(file_cvt, file_name));
23894994d37SDimitry Andric 
239312c0ed1SDimitry Andric   llvm::sys::path::Style style = working_dir.String.starts_with("/")
24094994d37SDimitry Andric                                      ? llvm::sys::path::Style::posix
24194994d37SDimitry Andric                                      : llvm::sys::path::Style::windows;
24294994d37SDimitry Andric   if (llvm::sys::path::is_absolute(file_name.String, style))
24394994d37SDimitry Andric     return file_name.String;
24494994d37SDimitry Andric 
24594994d37SDimitry Andric   llvm::SmallString<64> absolute_path = working_dir.String;
24694994d37SDimitry Andric   llvm::sys::path::append(absolute_path, file_name.String);
24794994d37SDimitry Andric   return absolute_path;
24894994d37SDimitry Andric }
249