xref: /src/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
1148779dfSDimitry Andric //===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
2148779dfSDimitry Andric //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6148779dfSDimitry Andric //
7148779dfSDimitry Andric //===----------------------------------------------------------------------===//
8148779dfSDimitry Andric 
9148779dfSDimitry Andric #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
10148779dfSDimitry Andric 
11148779dfSDimitry Andric #include "llvm/ADT/ArrayRef.h"
12148779dfSDimitry Andric #include "llvm/DebugInfo/PDB/Native/Hash.h"
13148779dfSDimitry Andric #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
14148779dfSDimitry Andric #include "llvm/Support/BinaryStreamWriter.h"
15148779dfSDimitry Andric #include "llvm/Support/Endian.h"
16b1c73532SDimitry Andric #include "llvm/Support/TimeProfiler.h"
17148779dfSDimitry Andric 
18eb11fae6SDimitry Andric #include <map>
19eb11fae6SDimitry Andric 
20148779dfSDimitry Andric using namespace llvm;
21148779dfSDimitry Andric using namespace llvm::msf;
22148779dfSDimitry Andric using namespace llvm::support;
23148779dfSDimitry Andric using namespace llvm::support::endian;
24148779dfSDimitry Andric using namespace llvm::pdb;
25148779dfSDimitry Andric 
StringTableHashTraits(PDBStringTableBuilder & Table)26eb11fae6SDimitry Andric StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table)
27eb11fae6SDimitry Andric     : Table(&Table) {}
28eb11fae6SDimitry Andric 
hashLookupKey(StringRef S) const29eb11fae6SDimitry Andric uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const {
30e6d15924SDimitry Andric   // The reference implementation doesn't include code for /src/headerblock
31e6d15924SDimitry Andric   // handling, but it can only read natvis entries lld's PDB files if
32e6d15924SDimitry Andric   // this hash function truncates the hash to 16 bit.
33e6d15924SDimitry Andric   // PDB/include/misc.h in the reference implementation has a hashSz() function
34e6d15924SDimitry Andric   // that returns an unsigned short, that seems what's being used for
35e6d15924SDimitry Andric   // /src/headerblock.
36e6d15924SDimitry Andric   return static_cast<uint16_t>(Table->getIdForString(S));
37eb11fae6SDimitry Andric }
38eb11fae6SDimitry Andric 
storageKeyToLookupKey(uint32_t Offset) const39eb11fae6SDimitry Andric StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const {
40eb11fae6SDimitry Andric   return Table->getStringForId(Offset);
41eb11fae6SDimitry Andric }
42eb11fae6SDimitry Andric 
lookupKeyToStorageKey(StringRef S)43eb11fae6SDimitry Andric uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) {
44eb11fae6SDimitry Andric   return Table->insert(S);
45eb11fae6SDimitry Andric }
46eb11fae6SDimitry Andric 
insert(StringRef S)47148779dfSDimitry Andric uint32_t PDBStringTableBuilder::insert(StringRef S) {
48148779dfSDimitry Andric   return Strings.insert(S);
49148779dfSDimitry Andric }
50148779dfSDimitry Andric 
getIdForString(StringRef S) const51eb11fae6SDimitry Andric uint32_t PDBStringTableBuilder::getIdForString(StringRef S) const {
52eb11fae6SDimitry Andric   return Strings.getIdForString(S);
53eb11fae6SDimitry Andric }
54eb11fae6SDimitry Andric 
getStringForId(uint32_t Id) const55eb11fae6SDimitry Andric StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const {
56eb11fae6SDimitry Andric   return Strings.getStringForId(Id);
57eb11fae6SDimitry Andric }
58eb11fae6SDimitry Andric 
computeBucketCount(uint32_t NumStrings)59e6d15924SDimitry Andric static uint32_t computeBucketCount(uint32_t NumStrings) {
60eb11fae6SDimitry Andric   // This is a precomputed list of Buckets given the specified number of
61eb11fae6SDimitry Andric   // strings.  Matching the reference algorithm exactly is not strictly
62eb11fae6SDimitry Andric   // necessary for correctness, but it helps when comparing LLD's PDBs with
63eb11fae6SDimitry Andric   // Microsoft's PDBs so as to eliminate superfluous differences.
64e6d15924SDimitry Andric   // The reference implementation does (in nmt.h, NMT::grow()):
65e6d15924SDimitry Andric   //   unsigned StringCount = 0;
66e6d15924SDimitry Andric   //   unsigned BucketCount = 1;
67e6d15924SDimitry Andric   //   fn insert() {
68e6d15924SDimitry Andric   //     ++StringCount;
69e6d15924SDimitry Andric   //     if (BucketCount * 3 / 4 < StringCount)
70e6d15924SDimitry Andric   //       BucketCount = BucketCount * 3 / 2 + 1;
71e6d15924SDimitry Andric   //   }
72e6d15924SDimitry Andric   // This list contains all StringCount, BucketCount pairs where BucketCount was
73e6d15924SDimitry Andric   // just incremented.  It ends before the first BucketCount entry where
74e6d15924SDimitry Andric   // BucketCount * 3 would overflow a 32-bit unsigned int.
75145449b1SDimitry Andric   static const std::pair<uint32_t, uint32_t> StringsToBuckets[] = {
76e6d15924SDimitry Andric       {0, 1},
77eb11fae6SDimitry Andric       {1, 2},
78eb11fae6SDimitry Andric       {2, 4},
79eb11fae6SDimitry Andric       {4, 7},
80eb11fae6SDimitry Andric       {6, 11},
81eb11fae6SDimitry Andric       {9, 17},
82eb11fae6SDimitry Andric       {13, 26},
83eb11fae6SDimitry Andric       {20, 40},
84eb11fae6SDimitry Andric       {31, 61},
85eb11fae6SDimitry Andric       {46, 92},
86eb11fae6SDimitry Andric       {70, 139},
87eb11fae6SDimitry Andric       {105, 209},
88eb11fae6SDimitry Andric       {157, 314},
89eb11fae6SDimitry Andric       {236, 472},
90eb11fae6SDimitry Andric       {355, 709},
91eb11fae6SDimitry Andric       {532, 1064},
92eb11fae6SDimitry Andric       {799, 1597},
93eb11fae6SDimitry Andric       {1198, 2396},
94eb11fae6SDimitry Andric       {1798, 3595},
95eb11fae6SDimitry Andric       {2697, 5393},
96eb11fae6SDimitry Andric       {4045, 8090},
97eb11fae6SDimitry Andric       {6068, 12136},
98eb11fae6SDimitry Andric       {9103, 18205},
99eb11fae6SDimitry Andric       {13654, 27308},
100eb11fae6SDimitry Andric       {20482, 40963},
101eb11fae6SDimitry Andric       {30723, 61445},
102eb11fae6SDimitry Andric       {46084, 92168},
103eb11fae6SDimitry Andric       {69127, 138253},
104eb11fae6SDimitry Andric       {103690, 207380},
105eb11fae6SDimitry Andric       {155536, 311071},
106eb11fae6SDimitry Andric       {233304, 466607},
107eb11fae6SDimitry Andric       {349956, 699911},
108eb11fae6SDimitry Andric       {524934, 1049867},
109eb11fae6SDimitry Andric       {787401, 1574801},
110eb11fae6SDimitry Andric       {1181101, 2362202},
111eb11fae6SDimitry Andric       {1771652, 3543304},
112eb11fae6SDimitry Andric       {2657479, 5314957},
113eb11fae6SDimitry Andric       {3986218, 7972436},
114eb11fae6SDimitry Andric       {5979328, 11958655},
115eb11fae6SDimitry Andric       {8968992, 17937983},
116eb11fae6SDimitry Andric       {13453488, 26906975},
117eb11fae6SDimitry Andric       {20180232, 40360463},
118eb11fae6SDimitry Andric       {30270348, 60540695},
119eb11fae6SDimitry Andric       {45405522, 90811043},
120eb11fae6SDimitry Andric       {68108283, 136216565},
121eb11fae6SDimitry Andric       {102162424, 204324848},
122eb11fae6SDimitry Andric       {153243637, 306487273},
123eb11fae6SDimitry Andric       {229865455, 459730910},
124eb11fae6SDimitry Andric       {344798183, 689596366},
125eb11fae6SDimitry Andric       {517197275, 1034394550},
126e6d15924SDimitry Andric       {775795913, 1551591826},
127e6d15924SDimitry Andric       {1163693870, 2327387740}};
128145449b1SDimitry Andric   const auto *Entry = llvm::lower_bound(
129145449b1SDimitry Andric       StringsToBuckets, std::make_pair(NumStrings, 0U), llvm::less_first());
130145449b1SDimitry Andric   assert(Entry != std::end(StringsToBuckets));
131eb11fae6SDimitry Andric   return Entry->second;
132148779dfSDimitry Andric }
133148779dfSDimitry Andric 
calculateHashTableSize() const134148779dfSDimitry Andric uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
135148779dfSDimitry Andric   uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field.
136148779dfSDimitry Andric   Size += sizeof(uint32_t) * computeBucketCount(Strings.size());
137148779dfSDimitry Andric 
138148779dfSDimitry Andric   return Size;
139148779dfSDimitry Andric }
140148779dfSDimitry Andric 
calculateSerializedSize() const141148779dfSDimitry Andric uint32_t PDBStringTableBuilder::calculateSerializedSize() const {
142148779dfSDimitry Andric   uint32_t Size = 0;
143148779dfSDimitry Andric   Size += sizeof(PDBStringTableHeader);
144148779dfSDimitry Andric   Size += Strings.calculateSerializedSize();
145148779dfSDimitry Andric   Size += calculateHashTableSize();
146148779dfSDimitry Andric   Size += sizeof(uint32_t); // The /names stream ends with the string count.
147148779dfSDimitry Andric   return Size;
148148779dfSDimitry Andric }
149148779dfSDimitry Andric 
setStrings(const codeview::DebugStringTableSubsection & Strings)1507c7aba6eSDimitry Andric void PDBStringTableBuilder::setStrings(
1517c7aba6eSDimitry Andric     const codeview::DebugStringTableSubsection &Strings) {
1527c7aba6eSDimitry Andric   this->Strings = Strings;
1537c7aba6eSDimitry Andric }
1547c7aba6eSDimitry Andric 
writeHeader(BinaryStreamWriter & Writer) const155148779dfSDimitry Andric Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const {
156148779dfSDimitry Andric   // Write a header
157148779dfSDimitry Andric   PDBStringTableHeader H;
158148779dfSDimitry Andric   H.Signature = PDBStringTableSignature;
159148779dfSDimitry Andric   H.HashVersion = 1;
160148779dfSDimitry Andric   H.ByteSize = Strings.calculateSerializedSize();
161148779dfSDimitry Andric   if (auto EC = Writer.writeObject(H))
162148779dfSDimitry Andric     return EC;
163148779dfSDimitry Andric   assert(Writer.bytesRemaining() == 0);
164148779dfSDimitry Andric   return Error::success();
165148779dfSDimitry Andric }
166148779dfSDimitry Andric 
writeStrings(BinaryStreamWriter & Writer) const167148779dfSDimitry Andric Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const {
168148779dfSDimitry Andric   if (auto EC = Strings.commit(Writer))
169148779dfSDimitry Andric     return EC;
170148779dfSDimitry Andric 
171148779dfSDimitry Andric   assert(Writer.bytesRemaining() == 0);
172148779dfSDimitry Andric   return Error::success();
173148779dfSDimitry Andric }
174148779dfSDimitry Andric 
writeHashTable(BinaryStreamWriter & Writer) const175148779dfSDimitry Andric Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const {
176148779dfSDimitry Andric   // Write a hash table.
177148779dfSDimitry Andric   uint32_t BucketCount = computeBucketCount(Strings.size());
178148779dfSDimitry Andric   if (auto EC = Writer.writeInteger(BucketCount))
179148779dfSDimitry Andric     return EC;
180148779dfSDimitry Andric   std::vector<ulittle32_t> Buckets(BucketCount);
181148779dfSDimitry Andric 
182e3b55780SDimitry Andric   for (const auto &Pair : Strings) {
183148779dfSDimitry Andric     StringRef S = Pair.getKey();
184148779dfSDimitry Andric     uint32_t Offset = Pair.getValue();
185148779dfSDimitry Andric     uint32_t Hash = hashStringV1(S);
186148779dfSDimitry Andric 
187148779dfSDimitry Andric     for (uint32_t I = 0; I != BucketCount; ++I) {
188148779dfSDimitry Andric       uint32_t Slot = (Hash + I) % BucketCount;
189148779dfSDimitry Andric       if (Buckets[Slot] != 0)
190148779dfSDimitry Andric         continue;
191148779dfSDimitry Andric       Buckets[Slot] = Offset;
192148779dfSDimitry Andric       break;
193148779dfSDimitry Andric     }
194148779dfSDimitry Andric   }
195148779dfSDimitry Andric 
196148779dfSDimitry Andric   if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
197148779dfSDimitry Andric     return EC;
198148779dfSDimitry Andric 
199148779dfSDimitry Andric   assert(Writer.bytesRemaining() == 0);
200148779dfSDimitry Andric   return Error::success();
201148779dfSDimitry Andric }
202148779dfSDimitry Andric 
writeEpilogue(BinaryStreamWriter & Writer) const203148779dfSDimitry Andric Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
204148779dfSDimitry Andric   if (auto EC = Writer.writeInteger<uint32_t>(Strings.size()))
205148779dfSDimitry Andric     return EC;
206148779dfSDimitry Andric   assert(Writer.bytesRemaining() == 0);
207148779dfSDimitry Andric   return Error::success();
208148779dfSDimitry Andric }
209148779dfSDimitry Andric 
commit(BinaryStreamWriter & Writer) const210148779dfSDimitry Andric Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
211b1c73532SDimitry Andric   llvm::TimeTraceScope timeScope("Commit strings table");
212148779dfSDimitry Andric   BinaryStreamWriter SectionWriter;
213148779dfSDimitry Andric 
214148779dfSDimitry Andric   std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader));
215148779dfSDimitry Andric   if (auto EC = writeHeader(SectionWriter))
216148779dfSDimitry Andric     return EC;
217148779dfSDimitry Andric 
218148779dfSDimitry Andric   std::tie(SectionWriter, Writer) =
219148779dfSDimitry Andric       Writer.split(Strings.calculateSerializedSize());
220148779dfSDimitry Andric   if (auto EC = writeStrings(SectionWriter))
221148779dfSDimitry Andric     return EC;
222148779dfSDimitry Andric 
223148779dfSDimitry Andric   std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize());
224148779dfSDimitry Andric   if (auto EC = writeHashTable(SectionWriter))
225148779dfSDimitry Andric     return EC;
226148779dfSDimitry Andric 
227148779dfSDimitry Andric   std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t));
228148779dfSDimitry Andric   if (auto EC = writeEpilogue(SectionWriter))
229148779dfSDimitry Andric     return EC;
230148779dfSDimitry Andric 
231148779dfSDimitry Andric   return Error::success();
232148779dfSDimitry Andric }
233