1344a3780SDimitry Andric //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2344a3780SDimitry Andric //
3344a3780SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4344a3780SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5344a3780SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6344a3780SDimitry Andric //
7344a3780SDimitry Andric //===----------------------------------------------------------------------===//
8344a3780SDimitry Andric //
9344a3780SDimitry Andric // This file defines a demangler for Rust v0 mangled symbols as specified in
10344a3780SDimitry Andric // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11344a3780SDimitry Andric //
12344a3780SDimitry Andric //===----------------------------------------------------------------------===//
13344a3780SDimitry Andric
14344a3780SDimitry Andric #include "llvm/Demangle/Demangle.h"
157fa27ce4SDimitry Andric #include "llvm/Demangle/StringViewExtras.h"
16344a3780SDimitry Andric #include "llvm/Demangle/Utility.h"
17344a3780SDimitry Andric
18344a3780SDimitry Andric #include <algorithm>
19344a3780SDimitry Andric #include <cassert>
20344a3780SDimitry Andric #include <cstdint>
21344a3780SDimitry Andric #include <cstring>
22344a3780SDimitry Andric #include <limits>
237fa27ce4SDimitry Andric #include <string_view>
24344a3780SDimitry Andric
25344a3780SDimitry Andric using namespace llvm;
26344a3780SDimitry Andric
27c0981da4SDimitry Andric using llvm::itanium_demangle::OutputBuffer;
28145449b1SDimitry Andric using llvm::itanium_demangle::ScopedOverride;
297fa27ce4SDimitry Andric using llvm::itanium_demangle::starts_with;
30344a3780SDimitry Andric
31344a3780SDimitry Andric namespace {
32344a3780SDimitry Andric
33344a3780SDimitry Andric struct Identifier {
347fa27ce4SDimitry Andric std::string_view Name;
35344a3780SDimitry Andric bool Punycode;
36344a3780SDimitry Andric
empty__anon7e2d7f5c0111::Identifier37344a3780SDimitry Andric bool empty() const { return Name.empty(); }
38344a3780SDimitry Andric };
39344a3780SDimitry Andric
40344a3780SDimitry Andric enum class BasicType {
41344a3780SDimitry Andric Bool,
42344a3780SDimitry Andric Char,
43344a3780SDimitry Andric I8,
44344a3780SDimitry Andric I16,
45344a3780SDimitry Andric I32,
46344a3780SDimitry Andric I64,
47344a3780SDimitry Andric I128,
48344a3780SDimitry Andric ISize,
49344a3780SDimitry Andric U8,
50344a3780SDimitry Andric U16,
51344a3780SDimitry Andric U32,
52344a3780SDimitry Andric U64,
53344a3780SDimitry Andric U128,
54344a3780SDimitry Andric USize,
55344a3780SDimitry Andric F32,
56344a3780SDimitry Andric F64,
57344a3780SDimitry Andric Str,
58344a3780SDimitry Andric Placeholder,
59344a3780SDimitry Andric Unit,
60344a3780SDimitry Andric Variadic,
61344a3780SDimitry Andric Never,
62344a3780SDimitry Andric };
63344a3780SDimitry Andric
64344a3780SDimitry Andric enum class IsInType {
65344a3780SDimitry Andric No,
66344a3780SDimitry Andric Yes,
67344a3780SDimitry Andric };
68344a3780SDimitry Andric
69344a3780SDimitry Andric enum class LeaveGenericsOpen {
70344a3780SDimitry Andric No,
71344a3780SDimitry Andric Yes,
72344a3780SDimitry Andric };
73344a3780SDimitry Andric
74344a3780SDimitry Andric class Demangler {
75344a3780SDimitry Andric // Maximum recursion level. Used to avoid stack overflow.
76344a3780SDimitry Andric size_t MaxRecursionLevel;
77344a3780SDimitry Andric // Current recursion level.
78344a3780SDimitry Andric size_t RecursionLevel;
79344a3780SDimitry Andric size_t BoundLifetimes;
80344a3780SDimitry Andric // Input string that is being demangled with "_R" prefix removed.
817fa27ce4SDimitry Andric std::string_view Input;
82344a3780SDimitry Andric // Position in the input string.
83344a3780SDimitry Andric size_t Position;
84344a3780SDimitry Andric // When true, print methods append the output to the stream.
85344a3780SDimitry Andric // When false, the output is suppressed.
86344a3780SDimitry Andric bool Print;
87344a3780SDimitry Andric // True if an error occurred.
88344a3780SDimitry Andric bool Error;
89344a3780SDimitry Andric
90344a3780SDimitry Andric public:
91344a3780SDimitry Andric // Demangled output.
92c0981da4SDimitry Andric OutputBuffer Output;
93344a3780SDimitry Andric
94344a3780SDimitry Andric Demangler(size_t MaxRecursionLevel = 500);
95344a3780SDimitry Andric
967fa27ce4SDimitry Andric bool demangle(std::string_view MangledName);
97344a3780SDimitry Andric
98344a3780SDimitry Andric private:
99344a3780SDimitry Andric bool demanglePath(IsInType Type,
100344a3780SDimitry Andric LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
101344a3780SDimitry Andric void demangleImplPath(IsInType InType);
102344a3780SDimitry Andric void demangleGenericArg();
103344a3780SDimitry Andric void demangleType();
104344a3780SDimitry Andric void demangleFnSig();
105344a3780SDimitry Andric void demangleDynBounds();
106344a3780SDimitry Andric void demangleDynTrait();
107344a3780SDimitry Andric void demangleOptionalBinder();
108344a3780SDimitry Andric void demangleConst();
109344a3780SDimitry Andric void demangleConstInt();
110344a3780SDimitry Andric void demangleConstBool();
111344a3780SDimitry Andric void demangleConstChar();
112344a3780SDimitry Andric
demangleBackref(Callable Demangler)113344a3780SDimitry Andric template <typename Callable> void demangleBackref(Callable Demangler) {
114344a3780SDimitry Andric uint64_t Backref = parseBase62Number();
115344a3780SDimitry Andric if (Error || Backref >= Position) {
116344a3780SDimitry Andric Error = true;
117344a3780SDimitry Andric return;
118344a3780SDimitry Andric }
119344a3780SDimitry Andric
120344a3780SDimitry Andric if (!Print)
121344a3780SDimitry Andric return;
122344a3780SDimitry Andric
123145449b1SDimitry Andric ScopedOverride<size_t> SavePosition(Position, Position);
124344a3780SDimitry Andric Position = Backref;
125344a3780SDimitry Andric Demangler();
126344a3780SDimitry Andric }
127344a3780SDimitry Andric
128344a3780SDimitry Andric Identifier parseIdentifier();
129344a3780SDimitry Andric uint64_t parseOptionalBase62Number(char Tag);
130344a3780SDimitry Andric uint64_t parseBase62Number();
131344a3780SDimitry Andric uint64_t parseDecimalNumber();
1327fa27ce4SDimitry Andric uint64_t parseHexNumber(std::string_view &HexDigits);
133344a3780SDimitry Andric
134344a3780SDimitry Andric void print(char C);
1357fa27ce4SDimitry Andric void print(std::string_view S);
136344a3780SDimitry Andric void printDecimalNumber(uint64_t N);
137344a3780SDimitry Andric void printBasicType(BasicType);
138344a3780SDimitry Andric void printLifetime(uint64_t Index);
139c0981da4SDimitry Andric void printIdentifier(Identifier Ident);
140344a3780SDimitry Andric
141344a3780SDimitry Andric char look() const;
142344a3780SDimitry Andric char consume();
143344a3780SDimitry Andric bool consumeIf(char Prefix);
144344a3780SDimitry Andric
145344a3780SDimitry Andric bool addAssign(uint64_t &A, uint64_t B);
146344a3780SDimitry Andric bool mulAssign(uint64_t &A, uint64_t B);
147344a3780SDimitry Andric };
148344a3780SDimitry Andric
149344a3780SDimitry Andric } // namespace
150344a3780SDimitry Andric
rustDemangle(std::string_view MangledName)1517fa27ce4SDimitry Andric char *llvm::rustDemangle(std::string_view MangledName) {
152344a3780SDimitry Andric // Return early if mangled name doesn't look like a Rust symbol.
1537fa27ce4SDimitry Andric if (MangledName.empty() || !starts_with(MangledName, "_R"))
154344a3780SDimitry Andric return nullptr;
155344a3780SDimitry Andric
156344a3780SDimitry Andric Demangler D;
1577fa27ce4SDimitry Andric if (!D.demangle(MangledName)) {
158344a3780SDimitry Andric std::free(D.Output.getBuffer());
159344a3780SDimitry Andric return nullptr;
160344a3780SDimitry Andric }
161344a3780SDimitry Andric
162344a3780SDimitry Andric D.Output += '\0';
163344a3780SDimitry Andric
164145449b1SDimitry Andric return D.Output.getBuffer();
165344a3780SDimitry Andric }
166344a3780SDimitry Andric
Demangler(size_t MaxRecursionLevel)167344a3780SDimitry Andric Demangler::Demangler(size_t MaxRecursionLevel)
168344a3780SDimitry Andric : MaxRecursionLevel(MaxRecursionLevel) {}
169344a3780SDimitry Andric
isDigit(const char C)170344a3780SDimitry Andric static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
171344a3780SDimitry Andric
isHexDigit(const char C)172344a3780SDimitry Andric static inline bool isHexDigit(const char C) {
173344a3780SDimitry Andric return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
174344a3780SDimitry Andric }
175344a3780SDimitry Andric
isLower(const char C)176344a3780SDimitry Andric static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
177344a3780SDimitry Andric
isUpper(const char C)178344a3780SDimitry Andric static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
179344a3780SDimitry Andric
180344a3780SDimitry Andric /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
isValid(const char C)181344a3780SDimitry Andric static inline bool isValid(const char C) {
182344a3780SDimitry Andric return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
183344a3780SDimitry Andric }
184344a3780SDimitry Andric
185344a3780SDimitry Andric // Demangles Rust v0 mangled symbol. Returns true when successful, and false
186344a3780SDimitry Andric // otherwise. The demangled symbol is stored in Output field. It is
187344a3780SDimitry Andric // responsibility of the caller to free the memory behind the output stream.
188344a3780SDimitry Andric //
189344a3780SDimitry Andric // <symbol-name> = "_R" <path> [<instantiating-crate>]
demangle(std::string_view Mangled)1907fa27ce4SDimitry Andric bool Demangler::demangle(std::string_view Mangled) {
191344a3780SDimitry Andric Position = 0;
192344a3780SDimitry Andric Error = false;
193344a3780SDimitry Andric Print = true;
194344a3780SDimitry Andric RecursionLevel = 0;
195344a3780SDimitry Andric BoundLifetimes = 0;
196344a3780SDimitry Andric
1977fa27ce4SDimitry Andric if (!starts_with(Mangled, "_R")) {
198344a3780SDimitry Andric Error = true;
199344a3780SDimitry Andric return false;
200344a3780SDimitry Andric }
2017fa27ce4SDimitry Andric Mangled.remove_prefix(2);
202344a3780SDimitry Andric size_t Dot = Mangled.find('.');
2037fa27ce4SDimitry Andric Input = Dot == std::string_view::npos ? Mangled : Mangled.substr(0, Dot);
204344a3780SDimitry Andric
205344a3780SDimitry Andric demanglePath(IsInType::No);
206344a3780SDimitry Andric
207344a3780SDimitry Andric if (Position != Input.size()) {
208145449b1SDimitry Andric ScopedOverride<bool> SavePrint(Print, false);
209344a3780SDimitry Andric demanglePath(IsInType::No);
210344a3780SDimitry Andric }
211344a3780SDimitry Andric
212344a3780SDimitry Andric if (Position != Input.size())
213344a3780SDimitry Andric Error = true;
214344a3780SDimitry Andric
2157fa27ce4SDimitry Andric if (Dot != std::string_view::npos) {
216344a3780SDimitry Andric print(" (");
2177fa27ce4SDimitry Andric print(Mangled.substr(Dot));
218344a3780SDimitry Andric print(")");
219344a3780SDimitry Andric }
220344a3780SDimitry Andric
221344a3780SDimitry Andric return !Error;
222344a3780SDimitry Andric }
223344a3780SDimitry Andric
224344a3780SDimitry Andric // Demangles a path. InType indicates whether a path is inside a type. When
225344a3780SDimitry Andric // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
226344a3780SDimitry Andric // output. Return value indicates whether generics arguments have been left
227344a3780SDimitry Andric // open.
228344a3780SDimitry Andric //
229344a3780SDimitry Andric // <path> = "C" <identifier> // crate root
230344a3780SDimitry Andric // | "M" <impl-path> <type> // <T> (inherent impl)
231344a3780SDimitry Andric // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)
232344a3780SDimitry Andric // | "Y" <type> <path> // <T as Trait> (trait definition)
233344a3780SDimitry Andric // | "N" <ns> <path> <identifier> // ...::ident (nested path)
234344a3780SDimitry Andric // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
235344a3780SDimitry Andric // | <backref>
236344a3780SDimitry Andric // <identifier> = [<disambiguator>] <undisambiguated-identifier>
237344a3780SDimitry Andric // <ns> = "C" // closure
238344a3780SDimitry Andric // | "S" // shim
239344a3780SDimitry Andric // | <A-Z> // other special namespaces
240344a3780SDimitry Andric // | <a-z> // internal namespaces
demanglePath(IsInType InType,LeaveGenericsOpen LeaveOpen)241344a3780SDimitry Andric bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
242344a3780SDimitry Andric if (Error || RecursionLevel >= MaxRecursionLevel) {
243344a3780SDimitry Andric Error = true;
244344a3780SDimitry Andric return false;
245344a3780SDimitry Andric }
246145449b1SDimitry Andric ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
247344a3780SDimitry Andric
248344a3780SDimitry Andric switch (consume()) {
249344a3780SDimitry Andric case 'C': {
250344a3780SDimitry Andric parseOptionalBase62Number('s');
251c0981da4SDimitry Andric printIdentifier(parseIdentifier());
252344a3780SDimitry Andric break;
253344a3780SDimitry Andric }
254344a3780SDimitry Andric case 'M': {
255344a3780SDimitry Andric demangleImplPath(InType);
256344a3780SDimitry Andric print("<");
257344a3780SDimitry Andric demangleType();
258344a3780SDimitry Andric print(">");
259344a3780SDimitry Andric break;
260344a3780SDimitry Andric }
261344a3780SDimitry Andric case 'X': {
262344a3780SDimitry Andric demangleImplPath(InType);
263344a3780SDimitry Andric print("<");
264344a3780SDimitry Andric demangleType();
265344a3780SDimitry Andric print(" as ");
266344a3780SDimitry Andric demanglePath(IsInType::Yes);
267344a3780SDimitry Andric print(">");
268344a3780SDimitry Andric break;
269344a3780SDimitry Andric }
270344a3780SDimitry Andric case 'Y': {
271344a3780SDimitry Andric print("<");
272344a3780SDimitry Andric demangleType();
273344a3780SDimitry Andric print(" as ");
274344a3780SDimitry Andric demanglePath(IsInType::Yes);
275344a3780SDimitry Andric print(">");
276344a3780SDimitry Andric break;
277344a3780SDimitry Andric }
278344a3780SDimitry Andric case 'N': {
279344a3780SDimitry Andric char NS = consume();
280344a3780SDimitry Andric if (!isLower(NS) && !isUpper(NS)) {
281344a3780SDimitry Andric Error = true;
282344a3780SDimitry Andric break;
283344a3780SDimitry Andric }
284344a3780SDimitry Andric demanglePath(InType);
285344a3780SDimitry Andric
286344a3780SDimitry Andric uint64_t Disambiguator = parseOptionalBase62Number('s');
287344a3780SDimitry Andric Identifier Ident = parseIdentifier();
288344a3780SDimitry Andric
289344a3780SDimitry Andric if (isUpper(NS)) {
290344a3780SDimitry Andric // Special namespaces
291344a3780SDimitry Andric print("::{");
292344a3780SDimitry Andric if (NS == 'C')
293344a3780SDimitry Andric print("closure");
294344a3780SDimitry Andric else if (NS == 'S')
295344a3780SDimitry Andric print("shim");
296344a3780SDimitry Andric else
297344a3780SDimitry Andric print(NS);
298344a3780SDimitry Andric if (!Ident.empty()) {
299344a3780SDimitry Andric print(":");
300c0981da4SDimitry Andric printIdentifier(Ident);
301344a3780SDimitry Andric }
302344a3780SDimitry Andric print('#');
303344a3780SDimitry Andric printDecimalNumber(Disambiguator);
304344a3780SDimitry Andric print('}');
305344a3780SDimitry Andric } else {
306344a3780SDimitry Andric // Implementation internal namespaces.
307344a3780SDimitry Andric if (!Ident.empty()) {
308344a3780SDimitry Andric print("::");
309c0981da4SDimitry Andric printIdentifier(Ident);
310344a3780SDimitry Andric }
311344a3780SDimitry Andric }
312344a3780SDimitry Andric break;
313344a3780SDimitry Andric }
314344a3780SDimitry Andric case 'I': {
315344a3780SDimitry Andric demanglePath(InType);
316344a3780SDimitry Andric // Omit "::" when in a type, where it is optional.
317344a3780SDimitry Andric if (InType == IsInType::No)
318344a3780SDimitry Andric print("::");
319344a3780SDimitry Andric print("<");
320344a3780SDimitry Andric for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
321344a3780SDimitry Andric if (I > 0)
322344a3780SDimitry Andric print(", ");
323344a3780SDimitry Andric demangleGenericArg();
324344a3780SDimitry Andric }
325344a3780SDimitry Andric if (LeaveOpen == LeaveGenericsOpen::Yes)
326344a3780SDimitry Andric return true;
327344a3780SDimitry Andric else
328344a3780SDimitry Andric print(">");
329344a3780SDimitry Andric break;
330344a3780SDimitry Andric }
331344a3780SDimitry Andric case 'B': {
332344a3780SDimitry Andric bool IsOpen = false;
333344a3780SDimitry Andric demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
334344a3780SDimitry Andric return IsOpen;
335344a3780SDimitry Andric }
336344a3780SDimitry Andric default:
337344a3780SDimitry Andric Error = true;
338344a3780SDimitry Andric break;
339344a3780SDimitry Andric }
340344a3780SDimitry Andric
341344a3780SDimitry Andric return false;
342344a3780SDimitry Andric }
343344a3780SDimitry Andric
344344a3780SDimitry Andric // <impl-path> = [<disambiguator>] <path>
345344a3780SDimitry Andric // <disambiguator> = "s" <base-62-number>
demangleImplPath(IsInType InType)346344a3780SDimitry Andric void Demangler::demangleImplPath(IsInType InType) {
347145449b1SDimitry Andric ScopedOverride<bool> SavePrint(Print, false);
348344a3780SDimitry Andric parseOptionalBase62Number('s');
349344a3780SDimitry Andric demanglePath(InType);
350344a3780SDimitry Andric }
351344a3780SDimitry Andric
352344a3780SDimitry Andric // <generic-arg> = <lifetime>
353344a3780SDimitry Andric // | <type>
354344a3780SDimitry Andric // | "K" <const>
355344a3780SDimitry Andric // <lifetime> = "L" <base-62-number>
demangleGenericArg()356344a3780SDimitry Andric void Demangler::demangleGenericArg() {
357344a3780SDimitry Andric if (consumeIf('L'))
358344a3780SDimitry Andric printLifetime(parseBase62Number());
359344a3780SDimitry Andric else if (consumeIf('K'))
360344a3780SDimitry Andric demangleConst();
361344a3780SDimitry Andric else
362344a3780SDimitry Andric demangleType();
363344a3780SDimitry Andric }
364344a3780SDimitry Andric
365344a3780SDimitry Andric // <basic-type> = "a" // i8
366344a3780SDimitry Andric // | "b" // bool
367344a3780SDimitry Andric // | "c" // char
368344a3780SDimitry Andric // | "d" // f64
369344a3780SDimitry Andric // | "e" // str
370344a3780SDimitry Andric // | "f" // f32
371344a3780SDimitry Andric // | "h" // u8
372344a3780SDimitry Andric // | "i" // isize
373344a3780SDimitry Andric // | "j" // usize
374344a3780SDimitry Andric // | "l" // i32
375344a3780SDimitry Andric // | "m" // u32
376344a3780SDimitry Andric // | "n" // i128
377344a3780SDimitry Andric // | "o" // u128
378344a3780SDimitry Andric // | "s" // i16
379344a3780SDimitry Andric // | "t" // u16
380344a3780SDimitry Andric // | "u" // ()
381344a3780SDimitry Andric // | "v" // ...
382344a3780SDimitry Andric // | "x" // i64
383344a3780SDimitry Andric // | "y" // u64
384344a3780SDimitry Andric // | "z" // !
385344a3780SDimitry Andric // | "p" // placeholder (e.g. for generic params), shown as _
parseBasicType(char C,BasicType & Type)386344a3780SDimitry Andric static bool parseBasicType(char C, BasicType &Type) {
387344a3780SDimitry Andric switch (C) {
388344a3780SDimitry Andric case 'a':
389344a3780SDimitry Andric Type = BasicType::I8;
390344a3780SDimitry Andric return true;
391344a3780SDimitry Andric case 'b':
392344a3780SDimitry Andric Type = BasicType::Bool;
393344a3780SDimitry Andric return true;
394344a3780SDimitry Andric case 'c':
395344a3780SDimitry Andric Type = BasicType::Char;
396344a3780SDimitry Andric return true;
397344a3780SDimitry Andric case 'd':
398344a3780SDimitry Andric Type = BasicType::F64;
399344a3780SDimitry Andric return true;
400344a3780SDimitry Andric case 'e':
401344a3780SDimitry Andric Type = BasicType::Str;
402344a3780SDimitry Andric return true;
403344a3780SDimitry Andric case 'f':
404344a3780SDimitry Andric Type = BasicType::F32;
405344a3780SDimitry Andric return true;
406344a3780SDimitry Andric case 'h':
407344a3780SDimitry Andric Type = BasicType::U8;
408344a3780SDimitry Andric return true;
409344a3780SDimitry Andric case 'i':
410344a3780SDimitry Andric Type = BasicType::ISize;
411344a3780SDimitry Andric return true;
412344a3780SDimitry Andric case 'j':
413344a3780SDimitry Andric Type = BasicType::USize;
414344a3780SDimitry Andric return true;
415344a3780SDimitry Andric case 'l':
416344a3780SDimitry Andric Type = BasicType::I32;
417344a3780SDimitry Andric return true;
418344a3780SDimitry Andric case 'm':
419344a3780SDimitry Andric Type = BasicType::U32;
420344a3780SDimitry Andric return true;
421344a3780SDimitry Andric case 'n':
422344a3780SDimitry Andric Type = BasicType::I128;
423344a3780SDimitry Andric return true;
424344a3780SDimitry Andric case 'o':
425344a3780SDimitry Andric Type = BasicType::U128;
426344a3780SDimitry Andric return true;
427344a3780SDimitry Andric case 'p':
428344a3780SDimitry Andric Type = BasicType::Placeholder;
429344a3780SDimitry Andric return true;
430344a3780SDimitry Andric case 's':
431344a3780SDimitry Andric Type = BasicType::I16;
432344a3780SDimitry Andric return true;
433344a3780SDimitry Andric case 't':
434344a3780SDimitry Andric Type = BasicType::U16;
435344a3780SDimitry Andric return true;
436344a3780SDimitry Andric case 'u':
437344a3780SDimitry Andric Type = BasicType::Unit;
438344a3780SDimitry Andric return true;
439344a3780SDimitry Andric case 'v':
440344a3780SDimitry Andric Type = BasicType::Variadic;
441344a3780SDimitry Andric return true;
442344a3780SDimitry Andric case 'x':
443344a3780SDimitry Andric Type = BasicType::I64;
444344a3780SDimitry Andric return true;
445344a3780SDimitry Andric case 'y':
446344a3780SDimitry Andric Type = BasicType::U64;
447344a3780SDimitry Andric return true;
448344a3780SDimitry Andric case 'z':
449344a3780SDimitry Andric Type = BasicType::Never;
450344a3780SDimitry Andric return true;
451344a3780SDimitry Andric default:
452344a3780SDimitry Andric return false;
453344a3780SDimitry Andric }
454344a3780SDimitry Andric }
455344a3780SDimitry Andric
printBasicType(BasicType Type)456344a3780SDimitry Andric void Demangler::printBasicType(BasicType Type) {
457344a3780SDimitry Andric switch (Type) {
458344a3780SDimitry Andric case BasicType::Bool:
459344a3780SDimitry Andric print("bool");
460344a3780SDimitry Andric break;
461344a3780SDimitry Andric case BasicType::Char:
462344a3780SDimitry Andric print("char");
463344a3780SDimitry Andric break;
464344a3780SDimitry Andric case BasicType::I8:
465344a3780SDimitry Andric print("i8");
466344a3780SDimitry Andric break;
467344a3780SDimitry Andric case BasicType::I16:
468344a3780SDimitry Andric print("i16");
469344a3780SDimitry Andric break;
470344a3780SDimitry Andric case BasicType::I32:
471344a3780SDimitry Andric print("i32");
472344a3780SDimitry Andric break;
473344a3780SDimitry Andric case BasicType::I64:
474344a3780SDimitry Andric print("i64");
475344a3780SDimitry Andric break;
476344a3780SDimitry Andric case BasicType::I128:
477344a3780SDimitry Andric print("i128");
478344a3780SDimitry Andric break;
479344a3780SDimitry Andric case BasicType::ISize:
480344a3780SDimitry Andric print("isize");
481344a3780SDimitry Andric break;
482344a3780SDimitry Andric case BasicType::U8:
483344a3780SDimitry Andric print("u8");
484344a3780SDimitry Andric break;
485344a3780SDimitry Andric case BasicType::U16:
486344a3780SDimitry Andric print("u16");
487344a3780SDimitry Andric break;
488344a3780SDimitry Andric case BasicType::U32:
489344a3780SDimitry Andric print("u32");
490344a3780SDimitry Andric break;
491344a3780SDimitry Andric case BasicType::U64:
492344a3780SDimitry Andric print("u64");
493344a3780SDimitry Andric break;
494344a3780SDimitry Andric case BasicType::U128:
495344a3780SDimitry Andric print("u128");
496344a3780SDimitry Andric break;
497344a3780SDimitry Andric case BasicType::USize:
498344a3780SDimitry Andric print("usize");
499344a3780SDimitry Andric break;
500344a3780SDimitry Andric case BasicType::F32:
501344a3780SDimitry Andric print("f32");
502344a3780SDimitry Andric break;
503344a3780SDimitry Andric case BasicType::F64:
504344a3780SDimitry Andric print("f64");
505344a3780SDimitry Andric break;
506344a3780SDimitry Andric case BasicType::Str:
507344a3780SDimitry Andric print("str");
508344a3780SDimitry Andric break;
509344a3780SDimitry Andric case BasicType::Placeholder:
510344a3780SDimitry Andric print("_");
511344a3780SDimitry Andric break;
512344a3780SDimitry Andric case BasicType::Unit:
513344a3780SDimitry Andric print("()");
514344a3780SDimitry Andric break;
515344a3780SDimitry Andric case BasicType::Variadic:
516344a3780SDimitry Andric print("...");
517344a3780SDimitry Andric break;
518344a3780SDimitry Andric case BasicType::Never:
519344a3780SDimitry Andric print("!");
520344a3780SDimitry Andric break;
521344a3780SDimitry Andric }
522344a3780SDimitry Andric }
523344a3780SDimitry Andric
524344a3780SDimitry Andric // <type> = | <basic-type>
525344a3780SDimitry Andric // | <path> // named type
526344a3780SDimitry Andric // | "A" <type> <const> // [T; N]
527344a3780SDimitry Andric // | "S" <type> // [T]
528344a3780SDimitry Andric // | "T" {<type>} "E" // (T1, T2, T3, ...)
529344a3780SDimitry Andric // | "R" [<lifetime>] <type> // &T
530344a3780SDimitry Andric // | "Q" [<lifetime>] <type> // &mut T
531344a3780SDimitry Andric // | "P" <type> // *const T
532344a3780SDimitry Andric // | "O" <type> // *mut T
533344a3780SDimitry Andric // | "F" <fn-sig> // fn(...) -> ...
534344a3780SDimitry Andric // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
535344a3780SDimitry Andric // | <backref> // backref
demangleType()536344a3780SDimitry Andric void Demangler::demangleType() {
537344a3780SDimitry Andric if (Error || RecursionLevel >= MaxRecursionLevel) {
538344a3780SDimitry Andric Error = true;
539344a3780SDimitry Andric return;
540344a3780SDimitry Andric }
541145449b1SDimitry Andric ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
542344a3780SDimitry Andric
543344a3780SDimitry Andric size_t Start = Position;
544344a3780SDimitry Andric char C = consume();
545344a3780SDimitry Andric BasicType Type;
546344a3780SDimitry Andric if (parseBasicType(C, Type))
547344a3780SDimitry Andric return printBasicType(Type);
548344a3780SDimitry Andric
549344a3780SDimitry Andric switch (C) {
550344a3780SDimitry Andric case 'A':
551344a3780SDimitry Andric print("[");
552344a3780SDimitry Andric demangleType();
553344a3780SDimitry Andric print("; ");
554344a3780SDimitry Andric demangleConst();
555344a3780SDimitry Andric print("]");
556344a3780SDimitry Andric break;
557344a3780SDimitry Andric case 'S':
558344a3780SDimitry Andric print("[");
559344a3780SDimitry Andric demangleType();
560344a3780SDimitry Andric print("]");
561344a3780SDimitry Andric break;
562344a3780SDimitry Andric case 'T': {
563344a3780SDimitry Andric print("(");
564344a3780SDimitry Andric size_t I = 0;
565344a3780SDimitry Andric for (; !Error && !consumeIf('E'); ++I) {
566344a3780SDimitry Andric if (I > 0)
567344a3780SDimitry Andric print(", ");
568344a3780SDimitry Andric demangleType();
569344a3780SDimitry Andric }
570344a3780SDimitry Andric if (I == 1)
571344a3780SDimitry Andric print(",");
572344a3780SDimitry Andric print(")");
573344a3780SDimitry Andric break;
574344a3780SDimitry Andric }
575344a3780SDimitry Andric case 'R':
576344a3780SDimitry Andric case 'Q':
577344a3780SDimitry Andric print('&');
578344a3780SDimitry Andric if (consumeIf('L')) {
579344a3780SDimitry Andric if (auto Lifetime = parseBase62Number()) {
580344a3780SDimitry Andric printLifetime(Lifetime);
581344a3780SDimitry Andric print(' ');
582344a3780SDimitry Andric }
583344a3780SDimitry Andric }
584344a3780SDimitry Andric if (C == 'Q')
585344a3780SDimitry Andric print("mut ");
586344a3780SDimitry Andric demangleType();
587344a3780SDimitry Andric break;
588344a3780SDimitry Andric case 'P':
589344a3780SDimitry Andric print("*const ");
590344a3780SDimitry Andric demangleType();
591344a3780SDimitry Andric break;
592344a3780SDimitry Andric case 'O':
593344a3780SDimitry Andric print("*mut ");
594344a3780SDimitry Andric demangleType();
595344a3780SDimitry Andric break;
596344a3780SDimitry Andric case 'F':
597344a3780SDimitry Andric demangleFnSig();
598344a3780SDimitry Andric break;
599344a3780SDimitry Andric case 'D':
600344a3780SDimitry Andric demangleDynBounds();
601344a3780SDimitry Andric if (consumeIf('L')) {
602344a3780SDimitry Andric if (auto Lifetime = parseBase62Number()) {
603344a3780SDimitry Andric print(" + ");
604344a3780SDimitry Andric printLifetime(Lifetime);
605344a3780SDimitry Andric }
606344a3780SDimitry Andric } else {
607344a3780SDimitry Andric Error = true;
608344a3780SDimitry Andric }
609344a3780SDimitry Andric break;
610344a3780SDimitry Andric case 'B':
611344a3780SDimitry Andric demangleBackref([&] { demangleType(); });
612344a3780SDimitry Andric break;
613344a3780SDimitry Andric default:
614344a3780SDimitry Andric Position = Start;
615344a3780SDimitry Andric demanglePath(IsInType::Yes);
616344a3780SDimitry Andric break;
617344a3780SDimitry Andric }
618344a3780SDimitry Andric }
619344a3780SDimitry Andric
620344a3780SDimitry Andric // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
621344a3780SDimitry Andric // <abi> = "C"
622344a3780SDimitry Andric // | <undisambiguated-identifier>
demangleFnSig()623344a3780SDimitry Andric void Demangler::demangleFnSig() {
624145449b1SDimitry Andric ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
625344a3780SDimitry Andric demangleOptionalBinder();
626344a3780SDimitry Andric
627344a3780SDimitry Andric if (consumeIf('U'))
628344a3780SDimitry Andric print("unsafe ");
629344a3780SDimitry Andric
630344a3780SDimitry Andric if (consumeIf('K')) {
631344a3780SDimitry Andric print("extern \"");
632344a3780SDimitry Andric if (consumeIf('C')) {
633344a3780SDimitry Andric print("C");
634344a3780SDimitry Andric } else {
635344a3780SDimitry Andric Identifier Ident = parseIdentifier();
636c0981da4SDimitry Andric if (Ident.Punycode)
637c0981da4SDimitry Andric Error = true;
638344a3780SDimitry Andric for (char C : Ident.Name) {
639344a3780SDimitry Andric // When mangling ABI string, the "-" is replaced with "_".
640344a3780SDimitry Andric if (C == '_')
641344a3780SDimitry Andric C = '-';
642344a3780SDimitry Andric print(C);
643344a3780SDimitry Andric }
644344a3780SDimitry Andric }
645344a3780SDimitry Andric print("\" ");
646344a3780SDimitry Andric }
647344a3780SDimitry Andric
648344a3780SDimitry Andric print("fn(");
649344a3780SDimitry Andric for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
650344a3780SDimitry Andric if (I > 0)
651344a3780SDimitry Andric print(", ");
652344a3780SDimitry Andric demangleType();
653344a3780SDimitry Andric }
654344a3780SDimitry Andric print(")");
655344a3780SDimitry Andric
656344a3780SDimitry Andric if (consumeIf('u')) {
657344a3780SDimitry Andric // Skip the unit type from the output.
658344a3780SDimitry Andric } else {
659344a3780SDimitry Andric print(" -> ");
660344a3780SDimitry Andric demangleType();
661344a3780SDimitry Andric }
662344a3780SDimitry Andric }
663344a3780SDimitry Andric
664344a3780SDimitry Andric // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
demangleDynBounds()665344a3780SDimitry Andric void Demangler::demangleDynBounds() {
666145449b1SDimitry Andric ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
667344a3780SDimitry Andric print("dyn ");
668344a3780SDimitry Andric demangleOptionalBinder();
669344a3780SDimitry Andric for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
670344a3780SDimitry Andric if (I > 0)
671344a3780SDimitry Andric print(" + ");
672344a3780SDimitry Andric demangleDynTrait();
673344a3780SDimitry Andric }
674344a3780SDimitry Andric }
675344a3780SDimitry Andric
676344a3780SDimitry Andric // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
677344a3780SDimitry Andric // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
demangleDynTrait()678344a3780SDimitry Andric void Demangler::demangleDynTrait() {
679344a3780SDimitry Andric bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
680344a3780SDimitry Andric while (!Error && consumeIf('p')) {
681344a3780SDimitry Andric if (!IsOpen) {
682344a3780SDimitry Andric IsOpen = true;
683344a3780SDimitry Andric print('<');
684344a3780SDimitry Andric } else {
685344a3780SDimitry Andric print(", ");
686344a3780SDimitry Andric }
687344a3780SDimitry Andric print(parseIdentifier().Name);
688344a3780SDimitry Andric print(" = ");
689344a3780SDimitry Andric demangleType();
690344a3780SDimitry Andric }
691344a3780SDimitry Andric if (IsOpen)
692344a3780SDimitry Andric print(">");
693344a3780SDimitry Andric }
694344a3780SDimitry Andric
695344a3780SDimitry Andric // Demangles optional binder and updates the number of bound lifetimes.
696344a3780SDimitry Andric //
697344a3780SDimitry Andric // <binder> = "G" <base-62-number>
demangleOptionalBinder()698344a3780SDimitry Andric void Demangler::demangleOptionalBinder() {
699344a3780SDimitry Andric uint64_t Binder = parseOptionalBase62Number('G');
700344a3780SDimitry Andric if (Error || Binder == 0)
701344a3780SDimitry Andric return;
702344a3780SDimitry Andric
703344a3780SDimitry Andric // In valid inputs each bound lifetime is referenced later. Referencing a
704344a3780SDimitry Andric // lifetime requires at least one byte of input. Reject inputs that are too
705344a3780SDimitry Andric // short to reference all bound lifetimes. Otherwise demangling of invalid
706344a3780SDimitry Andric // binders could generate excessive amounts of output.
707344a3780SDimitry Andric if (Binder >= Input.size() - BoundLifetimes) {
708344a3780SDimitry Andric Error = true;
709344a3780SDimitry Andric return;
710344a3780SDimitry Andric }
711344a3780SDimitry Andric
712344a3780SDimitry Andric print("for<");
713344a3780SDimitry Andric for (size_t I = 0; I != Binder; ++I) {
714344a3780SDimitry Andric BoundLifetimes += 1;
715344a3780SDimitry Andric if (I > 0)
716344a3780SDimitry Andric print(", ");
717344a3780SDimitry Andric printLifetime(1);
718344a3780SDimitry Andric }
719344a3780SDimitry Andric print("> ");
720344a3780SDimitry Andric }
721344a3780SDimitry Andric
722344a3780SDimitry Andric // <const> = <basic-type> <const-data>
723344a3780SDimitry Andric // | "p" // placeholder
724344a3780SDimitry Andric // | <backref>
demangleConst()725344a3780SDimitry Andric void Demangler::demangleConst() {
726344a3780SDimitry Andric if (Error || RecursionLevel >= MaxRecursionLevel) {
727344a3780SDimitry Andric Error = true;
728344a3780SDimitry Andric return;
729344a3780SDimitry Andric }
730145449b1SDimitry Andric ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
731344a3780SDimitry Andric
732344a3780SDimitry Andric char C = consume();
733344a3780SDimitry Andric BasicType Type;
734344a3780SDimitry Andric if (parseBasicType(C, Type)) {
735344a3780SDimitry Andric switch (Type) {
736344a3780SDimitry Andric case BasicType::I8:
737344a3780SDimitry Andric case BasicType::I16:
738344a3780SDimitry Andric case BasicType::I32:
739344a3780SDimitry Andric case BasicType::I64:
740344a3780SDimitry Andric case BasicType::I128:
741344a3780SDimitry Andric case BasicType::ISize:
742344a3780SDimitry Andric case BasicType::U8:
743344a3780SDimitry Andric case BasicType::U16:
744344a3780SDimitry Andric case BasicType::U32:
745344a3780SDimitry Andric case BasicType::U64:
746344a3780SDimitry Andric case BasicType::U128:
747344a3780SDimitry Andric case BasicType::USize:
748344a3780SDimitry Andric demangleConstInt();
749344a3780SDimitry Andric break;
750344a3780SDimitry Andric case BasicType::Bool:
751344a3780SDimitry Andric demangleConstBool();
752344a3780SDimitry Andric break;
753344a3780SDimitry Andric case BasicType::Char:
754344a3780SDimitry Andric demangleConstChar();
755344a3780SDimitry Andric break;
756344a3780SDimitry Andric case BasicType::Placeholder:
757344a3780SDimitry Andric print('_');
758344a3780SDimitry Andric break;
759344a3780SDimitry Andric default:
760344a3780SDimitry Andric Error = true;
761344a3780SDimitry Andric break;
762344a3780SDimitry Andric }
763344a3780SDimitry Andric } else if (C == 'B') {
764344a3780SDimitry Andric demangleBackref([&] { demangleConst(); });
765344a3780SDimitry Andric } else {
766344a3780SDimitry Andric Error = true;
767344a3780SDimitry Andric }
768344a3780SDimitry Andric }
769344a3780SDimitry Andric
770344a3780SDimitry Andric // <const-data> = ["n"] <hex-number>
demangleConstInt()771344a3780SDimitry Andric void Demangler::demangleConstInt() {
772344a3780SDimitry Andric if (consumeIf('n'))
773344a3780SDimitry Andric print('-');
774344a3780SDimitry Andric
7757fa27ce4SDimitry Andric std::string_view HexDigits;
776344a3780SDimitry Andric uint64_t Value = parseHexNumber(HexDigits);
777344a3780SDimitry Andric if (HexDigits.size() <= 16) {
778344a3780SDimitry Andric printDecimalNumber(Value);
779344a3780SDimitry Andric } else {
780344a3780SDimitry Andric print("0x");
781344a3780SDimitry Andric print(HexDigits);
782344a3780SDimitry Andric }
783344a3780SDimitry Andric }
784344a3780SDimitry Andric
785344a3780SDimitry Andric // <const-data> = "0_" // false
786344a3780SDimitry Andric // | "1_" // true
demangleConstBool()787344a3780SDimitry Andric void Demangler::demangleConstBool() {
7887fa27ce4SDimitry Andric std::string_view HexDigits;
789344a3780SDimitry Andric parseHexNumber(HexDigits);
790344a3780SDimitry Andric if (HexDigits == "0")
791344a3780SDimitry Andric print("false");
792344a3780SDimitry Andric else if (HexDigits == "1")
793344a3780SDimitry Andric print("true");
794344a3780SDimitry Andric else
795344a3780SDimitry Andric Error = true;
796344a3780SDimitry Andric }
797344a3780SDimitry Andric
798344a3780SDimitry Andric /// Returns true if CodePoint represents a printable ASCII character.
isAsciiPrintable(uint64_t CodePoint)799344a3780SDimitry Andric static bool isAsciiPrintable(uint64_t CodePoint) {
800344a3780SDimitry Andric return 0x20 <= CodePoint && CodePoint <= 0x7e;
801344a3780SDimitry Andric }
802344a3780SDimitry Andric
803344a3780SDimitry Andric // <const-data> = <hex-number>
demangleConstChar()804344a3780SDimitry Andric void Demangler::demangleConstChar() {
8057fa27ce4SDimitry Andric std::string_view HexDigits;
806344a3780SDimitry Andric uint64_t CodePoint = parseHexNumber(HexDigits);
807344a3780SDimitry Andric if (Error || HexDigits.size() > 6) {
808344a3780SDimitry Andric Error = true;
809344a3780SDimitry Andric return;
810344a3780SDimitry Andric }
811344a3780SDimitry Andric
812344a3780SDimitry Andric print("'");
813344a3780SDimitry Andric switch (CodePoint) {
814344a3780SDimitry Andric case '\t':
815344a3780SDimitry Andric print(R"(\t)");
816344a3780SDimitry Andric break;
817344a3780SDimitry Andric case '\r':
818344a3780SDimitry Andric print(R"(\r)");
819344a3780SDimitry Andric break;
820344a3780SDimitry Andric case '\n':
821344a3780SDimitry Andric print(R"(\n)");
822344a3780SDimitry Andric break;
823344a3780SDimitry Andric case '\\':
824344a3780SDimitry Andric print(R"(\\)");
825344a3780SDimitry Andric break;
826344a3780SDimitry Andric case '"':
827344a3780SDimitry Andric print(R"(")");
828344a3780SDimitry Andric break;
829344a3780SDimitry Andric case '\'':
830344a3780SDimitry Andric print(R"(\')");
831344a3780SDimitry Andric break;
832344a3780SDimitry Andric default:
833344a3780SDimitry Andric if (isAsciiPrintable(CodePoint)) {
834344a3780SDimitry Andric char C = CodePoint;
835344a3780SDimitry Andric print(C);
836344a3780SDimitry Andric } else {
837344a3780SDimitry Andric print(R"(\u{)");
838344a3780SDimitry Andric print(HexDigits);
839344a3780SDimitry Andric print('}');
840344a3780SDimitry Andric }
841344a3780SDimitry Andric break;
842344a3780SDimitry Andric }
843344a3780SDimitry Andric print('\'');
844344a3780SDimitry Andric }
845344a3780SDimitry Andric
846344a3780SDimitry Andric // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
parseIdentifier()847344a3780SDimitry Andric Identifier Demangler::parseIdentifier() {
848344a3780SDimitry Andric bool Punycode = consumeIf('u');
849344a3780SDimitry Andric uint64_t Bytes = parseDecimalNumber();
850344a3780SDimitry Andric
851344a3780SDimitry Andric // Underscore resolves the ambiguity when identifier starts with a decimal
852344a3780SDimitry Andric // digit or another underscore.
853344a3780SDimitry Andric consumeIf('_');
854344a3780SDimitry Andric
855344a3780SDimitry Andric if (Error || Bytes > Input.size() - Position) {
856344a3780SDimitry Andric Error = true;
857344a3780SDimitry Andric return {};
858344a3780SDimitry Andric }
8597fa27ce4SDimitry Andric std::string_view S = Input.substr(Position, Bytes);
860344a3780SDimitry Andric Position += Bytes;
861344a3780SDimitry Andric
862344a3780SDimitry Andric if (!std::all_of(S.begin(), S.end(), isValid)) {
863344a3780SDimitry Andric Error = true;
864344a3780SDimitry Andric return {};
865344a3780SDimitry Andric }
866344a3780SDimitry Andric
867344a3780SDimitry Andric return {S, Punycode};
868344a3780SDimitry Andric }
869344a3780SDimitry Andric
870344a3780SDimitry Andric // Parses optional base 62 number. The presence of a number is determined using
871344a3780SDimitry Andric // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
872344a3780SDimitry Andric //
873344a3780SDimitry Andric // This function is indended for parsing disambiguators and binders which when
874344a3780SDimitry Andric // not present have their value interpreted as 0, and otherwise as decoded
875344a3780SDimitry Andric // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
876344a3780SDimitry Andric // 2. When "G" is absent value is 0.
parseOptionalBase62Number(char Tag)877344a3780SDimitry Andric uint64_t Demangler::parseOptionalBase62Number(char Tag) {
878344a3780SDimitry Andric if (!consumeIf(Tag))
879344a3780SDimitry Andric return 0;
880344a3780SDimitry Andric
881344a3780SDimitry Andric uint64_t N = parseBase62Number();
882344a3780SDimitry Andric if (Error || !addAssign(N, 1))
883344a3780SDimitry Andric return 0;
884344a3780SDimitry Andric
885344a3780SDimitry Andric return N;
886344a3780SDimitry Andric }
887344a3780SDimitry Andric
888344a3780SDimitry Andric // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
889344a3780SDimitry Andric // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
890344a3780SDimitry Andric // "1_" encodes 2, etc.
891344a3780SDimitry Andric //
892344a3780SDimitry Andric // <base-62-number> = {<0-9a-zA-Z>} "_"
parseBase62Number()893344a3780SDimitry Andric uint64_t Demangler::parseBase62Number() {
894344a3780SDimitry Andric if (consumeIf('_'))
895344a3780SDimitry Andric return 0;
896344a3780SDimitry Andric
897344a3780SDimitry Andric uint64_t Value = 0;
898344a3780SDimitry Andric
899344a3780SDimitry Andric while (true) {
900344a3780SDimitry Andric uint64_t Digit;
901344a3780SDimitry Andric char C = consume();
902344a3780SDimitry Andric
903344a3780SDimitry Andric if (C == '_') {
904344a3780SDimitry Andric break;
905344a3780SDimitry Andric } else if (isDigit(C)) {
906344a3780SDimitry Andric Digit = C - '0';
907344a3780SDimitry Andric } else if (isLower(C)) {
908344a3780SDimitry Andric Digit = 10 + (C - 'a');
909344a3780SDimitry Andric } else if (isUpper(C)) {
910344a3780SDimitry Andric Digit = 10 + 26 + (C - 'A');
911344a3780SDimitry Andric } else {
912344a3780SDimitry Andric Error = true;
913344a3780SDimitry Andric return 0;
914344a3780SDimitry Andric }
915344a3780SDimitry Andric
916344a3780SDimitry Andric if (!mulAssign(Value, 62))
917344a3780SDimitry Andric return 0;
918344a3780SDimitry Andric
919344a3780SDimitry Andric if (!addAssign(Value, Digit))
920344a3780SDimitry Andric return 0;
921344a3780SDimitry Andric }
922344a3780SDimitry Andric
923344a3780SDimitry Andric if (!addAssign(Value, 1))
924344a3780SDimitry Andric return 0;
925344a3780SDimitry Andric
926344a3780SDimitry Andric return Value;
927344a3780SDimitry Andric }
928344a3780SDimitry Andric
929344a3780SDimitry Andric // Parses a decimal number that had been encoded without any leading zeros.
930344a3780SDimitry Andric //
931344a3780SDimitry Andric // <decimal-number> = "0"
932344a3780SDimitry Andric // | <1-9> {<0-9>}
parseDecimalNumber()933344a3780SDimitry Andric uint64_t Demangler::parseDecimalNumber() {
934344a3780SDimitry Andric char C = look();
935344a3780SDimitry Andric if (!isDigit(C)) {
936344a3780SDimitry Andric Error = true;
937344a3780SDimitry Andric return 0;
938344a3780SDimitry Andric }
939344a3780SDimitry Andric
940344a3780SDimitry Andric if (C == '0') {
941344a3780SDimitry Andric consume();
942344a3780SDimitry Andric return 0;
943344a3780SDimitry Andric }
944344a3780SDimitry Andric
945344a3780SDimitry Andric uint64_t Value = 0;
946344a3780SDimitry Andric
947344a3780SDimitry Andric while (isDigit(look())) {
948344a3780SDimitry Andric if (!mulAssign(Value, 10)) {
949344a3780SDimitry Andric Error = true;
950344a3780SDimitry Andric return 0;
951344a3780SDimitry Andric }
952344a3780SDimitry Andric
953344a3780SDimitry Andric uint64_t D = consume() - '0';
954344a3780SDimitry Andric if (!addAssign(Value, D))
955344a3780SDimitry Andric return 0;
956344a3780SDimitry Andric }
957344a3780SDimitry Andric
958344a3780SDimitry Andric return Value;
959344a3780SDimitry Andric }
960344a3780SDimitry Andric
961344a3780SDimitry Andric // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
962344a3780SDimitry Andric // value and stores hex digits in HexDigits. The return value is unspecified if
963344a3780SDimitry Andric // HexDigits.size() > 16.
964344a3780SDimitry Andric //
965344a3780SDimitry Andric // <hex-number> = "0_"
966344a3780SDimitry Andric // | <1-9a-f> {<0-9a-f>} "_"
parseHexNumber(std::string_view & HexDigits)9677fa27ce4SDimitry Andric uint64_t Demangler::parseHexNumber(std::string_view &HexDigits) {
968344a3780SDimitry Andric size_t Start = Position;
969344a3780SDimitry Andric uint64_t Value = 0;
970344a3780SDimitry Andric
971344a3780SDimitry Andric if (!isHexDigit(look()))
972344a3780SDimitry Andric Error = true;
973344a3780SDimitry Andric
974344a3780SDimitry Andric if (consumeIf('0')) {
975344a3780SDimitry Andric if (!consumeIf('_'))
976344a3780SDimitry Andric Error = true;
977344a3780SDimitry Andric } else {
978344a3780SDimitry Andric while (!Error && !consumeIf('_')) {
979344a3780SDimitry Andric char C = consume();
980344a3780SDimitry Andric Value *= 16;
981344a3780SDimitry Andric if (isDigit(C))
982344a3780SDimitry Andric Value += C - '0';
983344a3780SDimitry Andric else if ('a' <= C && C <= 'f')
984344a3780SDimitry Andric Value += 10 + (C - 'a');
985344a3780SDimitry Andric else
986344a3780SDimitry Andric Error = true;
987344a3780SDimitry Andric }
988344a3780SDimitry Andric }
989344a3780SDimitry Andric
990344a3780SDimitry Andric if (Error) {
9917fa27ce4SDimitry Andric HexDigits = std::string_view();
992344a3780SDimitry Andric return 0;
993344a3780SDimitry Andric }
994344a3780SDimitry Andric
995344a3780SDimitry Andric size_t End = Position - 1;
996344a3780SDimitry Andric assert(Start < End);
997344a3780SDimitry Andric HexDigits = Input.substr(Start, End - Start);
998344a3780SDimitry Andric return Value;
999344a3780SDimitry Andric }
1000344a3780SDimitry Andric
print(char C)1001344a3780SDimitry Andric void Demangler::print(char C) {
1002344a3780SDimitry Andric if (Error || !Print)
1003344a3780SDimitry Andric return;
1004344a3780SDimitry Andric
1005344a3780SDimitry Andric Output += C;
1006344a3780SDimitry Andric }
1007344a3780SDimitry Andric
print(std::string_view S)10087fa27ce4SDimitry Andric void Demangler::print(std::string_view S) {
1009344a3780SDimitry Andric if (Error || !Print)
1010344a3780SDimitry Andric return;
1011344a3780SDimitry Andric
1012344a3780SDimitry Andric Output += S;
1013344a3780SDimitry Andric }
1014344a3780SDimitry Andric
printDecimalNumber(uint64_t N)1015344a3780SDimitry Andric void Demangler::printDecimalNumber(uint64_t N) {
1016344a3780SDimitry Andric if (Error || !Print)
1017344a3780SDimitry Andric return;
1018344a3780SDimitry Andric
1019344a3780SDimitry Andric Output << N;
1020344a3780SDimitry Andric }
1021344a3780SDimitry Andric
1022344a3780SDimitry Andric // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1023344a3780SDimitry Andric // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1024344a3780SDimitry Andric // bound by one of the enclosing binders.
printLifetime(uint64_t Index)1025344a3780SDimitry Andric void Demangler::printLifetime(uint64_t Index) {
1026344a3780SDimitry Andric if (Index == 0) {
1027344a3780SDimitry Andric print("'_");
1028344a3780SDimitry Andric return;
1029344a3780SDimitry Andric }
1030344a3780SDimitry Andric
1031344a3780SDimitry Andric if (Index - 1 >= BoundLifetimes) {
1032344a3780SDimitry Andric Error = true;
1033344a3780SDimitry Andric return;
1034344a3780SDimitry Andric }
1035344a3780SDimitry Andric
1036344a3780SDimitry Andric uint64_t Depth = BoundLifetimes - Index;
1037344a3780SDimitry Andric print('\'');
1038344a3780SDimitry Andric if (Depth < 26) {
1039344a3780SDimitry Andric char C = 'a' + Depth;
1040344a3780SDimitry Andric print(C);
1041344a3780SDimitry Andric } else {
1042344a3780SDimitry Andric print('z');
1043344a3780SDimitry Andric printDecimalNumber(Depth - 26 + 1);
1044344a3780SDimitry Andric }
1045344a3780SDimitry Andric }
1046344a3780SDimitry Andric
decodePunycodeDigit(char C,size_t & Value)1047c0981da4SDimitry Andric static inline bool decodePunycodeDigit(char C, size_t &Value) {
1048c0981da4SDimitry Andric if (isLower(C)) {
1049c0981da4SDimitry Andric Value = C - 'a';
1050c0981da4SDimitry Andric return true;
1051c0981da4SDimitry Andric }
1052c0981da4SDimitry Andric
1053c0981da4SDimitry Andric if (isDigit(C)) {
1054c0981da4SDimitry Andric Value = 26 + (C - '0');
1055c0981da4SDimitry Andric return true;
1056c0981da4SDimitry Andric }
1057c0981da4SDimitry Andric
1058c0981da4SDimitry Andric return false;
1059c0981da4SDimitry Andric }
1060c0981da4SDimitry Andric
removeNullBytes(OutputBuffer & Output,size_t StartIdx)1061c0981da4SDimitry Andric static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
1062c0981da4SDimitry Andric char *Buffer = Output.getBuffer();
1063c0981da4SDimitry Andric char *Start = Buffer + StartIdx;
1064c0981da4SDimitry Andric char *End = Buffer + Output.getCurrentPosition();
1065c0981da4SDimitry Andric Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
1066c0981da4SDimitry Andric }
1067c0981da4SDimitry Andric
1068c0981da4SDimitry Andric // Encodes code point as UTF-8 and stores results in Output. Returns false if
1069c0981da4SDimitry Andric // CodePoint is not a valid unicode scalar value.
encodeUTF8(size_t CodePoint,char * Output)1070c0981da4SDimitry Andric static inline bool encodeUTF8(size_t CodePoint, char *Output) {
1071c0981da4SDimitry Andric if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1072c0981da4SDimitry Andric return false;
1073c0981da4SDimitry Andric
1074c0981da4SDimitry Andric if (CodePoint <= 0x7F) {
1075c0981da4SDimitry Andric Output[0] = CodePoint;
1076c0981da4SDimitry Andric return true;
1077c0981da4SDimitry Andric }
1078c0981da4SDimitry Andric
1079c0981da4SDimitry Andric if (CodePoint <= 0x7FF) {
1080c0981da4SDimitry Andric Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1081c0981da4SDimitry Andric Output[1] = 0x80 | (CodePoint & 0x3F);
1082c0981da4SDimitry Andric return true;
1083c0981da4SDimitry Andric }
1084c0981da4SDimitry Andric
1085c0981da4SDimitry Andric if (CodePoint <= 0xFFFF) {
1086c0981da4SDimitry Andric Output[0] = 0xE0 | (CodePoint >> 12);
1087c0981da4SDimitry Andric Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1088c0981da4SDimitry Andric Output[2] = 0x80 | (CodePoint & 0x3F);
1089c0981da4SDimitry Andric return true;
1090c0981da4SDimitry Andric }
1091c0981da4SDimitry Andric
1092c0981da4SDimitry Andric if (CodePoint <= 0x10FFFF) {
1093c0981da4SDimitry Andric Output[0] = 0xF0 | (CodePoint >> 18);
1094c0981da4SDimitry Andric Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1095c0981da4SDimitry Andric Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1096c0981da4SDimitry Andric Output[3] = 0x80 | (CodePoint & 0x3F);
1097c0981da4SDimitry Andric return true;
1098c0981da4SDimitry Andric }
1099c0981da4SDimitry Andric
1100c0981da4SDimitry Andric return false;
1101c0981da4SDimitry Andric }
1102c0981da4SDimitry Andric
1103c0981da4SDimitry Andric // Decodes string encoded using punycode and appends results to Output.
1104c0981da4SDimitry Andric // Returns true if decoding was successful.
decodePunycode(std::string_view Input,OutputBuffer & Output)11057fa27ce4SDimitry Andric static bool decodePunycode(std::string_view Input, OutputBuffer &Output) {
1106c0981da4SDimitry Andric size_t OutputSize = Output.getCurrentPosition();
1107c0981da4SDimitry Andric size_t InputIdx = 0;
1108c0981da4SDimitry Andric
1109c0981da4SDimitry Andric // Rust uses an underscore as a delimiter.
11107fa27ce4SDimitry Andric size_t DelimiterPos = std::string_view::npos;
1111c0981da4SDimitry Andric for (size_t I = 0; I != Input.size(); ++I)
1112c0981da4SDimitry Andric if (Input[I] == '_')
1113c0981da4SDimitry Andric DelimiterPos = I;
1114c0981da4SDimitry Andric
11157fa27ce4SDimitry Andric if (DelimiterPos != std::string_view::npos) {
1116c0981da4SDimitry Andric // Copy basic code points before the last delimiter to the output.
1117c0981da4SDimitry Andric for (; InputIdx != DelimiterPos; ++InputIdx) {
1118c0981da4SDimitry Andric char C = Input[InputIdx];
1119c0981da4SDimitry Andric if (!isValid(C))
1120c0981da4SDimitry Andric return false;
1121c0981da4SDimitry Andric // Code points are padded with zeros while decoding is in progress.
1122c0981da4SDimitry Andric char UTF8[4] = {C};
11237fa27ce4SDimitry Andric Output += std::string_view(UTF8, 4);
1124c0981da4SDimitry Andric }
1125c0981da4SDimitry Andric // Skip over the delimiter.
1126c0981da4SDimitry Andric ++InputIdx;
1127c0981da4SDimitry Andric }
1128c0981da4SDimitry Andric
1129c0981da4SDimitry Andric size_t Base = 36;
1130c0981da4SDimitry Andric size_t Skew = 38;
1131c0981da4SDimitry Andric size_t Bias = 72;
1132c0981da4SDimitry Andric size_t N = 0x80;
1133c0981da4SDimitry Andric size_t TMin = 1;
1134c0981da4SDimitry Andric size_t TMax = 26;
1135c0981da4SDimitry Andric size_t Damp = 700;
1136c0981da4SDimitry Andric
1137c0981da4SDimitry Andric auto Adapt = [&](size_t Delta, size_t NumPoints) {
1138c0981da4SDimitry Andric Delta /= Damp;
1139c0981da4SDimitry Andric Delta += Delta / NumPoints;
1140c0981da4SDimitry Andric Damp = 2;
1141c0981da4SDimitry Andric
1142c0981da4SDimitry Andric size_t K = 0;
1143c0981da4SDimitry Andric while (Delta > (Base - TMin) * TMax / 2) {
1144c0981da4SDimitry Andric Delta /= Base - TMin;
1145c0981da4SDimitry Andric K += Base;
1146c0981da4SDimitry Andric }
1147c0981da4SDimitry Andric return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
1148c0981da4SDimitry Andric };
1149c0981da4SDimitry Andric
1150c0981da4SDimitry Andric // Main decoding loop.
1151c0981da4SDimitry Andric for (size_t I = 0; InputIdx != Input.size(); I += 1) {
1152c0981da4SDimitry Andric size_t OldI = I;
1153c0981da4SDimitry Andric size_t W = 1;
1154c0981da4SDimitry Andric size_t Max = std::numeric_limits<size_t>::max();
1155c0981da4SDimitry Andric for (size_t K = Base; true; K += Base) {
1156c0981da4SDimitry Andric if (InputIdx == Input.size())
1157c0981da4SDimitry Andric return false;
1158c0981da4SDimitry Andric char C = Input[InputIdx++];
1159c0981da4SDimitry Andric size_t Digit = 0;
1160c0981da4SDimitry Andric if (!decodePunycodeDigit(C, Digit))
1161c0981da4SDimitry Andric return false;
1162c0981da4SDimitry Andric
1163c0981da4SDimitry Andric if (Digit > (Max - I) / W)
1164c0981da4SDimitry Andric return false;
1165c0981da4SDimitry Andric I += Digit * W;
1166c0981da4SDimitry Andric
1167c0981da4SDimitry Andric size_t T;
1168c0981da4SDimitry Andric if (K <= Bias)
1169c0981da4SDimitry Andric T = TMin;
1170c0981da4SDimitry Andric else if (K >= Bias + TMax)
1171c0981da4SDimitry Andric T = TMax;
1172c0981da4SDimitry Andric else
1173c0981da4SDimitry Andric T = K - Bias;
1174c0981da4SDimitry Andric
1175c0981da4SDimitry Andric if (Digit < T)
1176c0981da4SDimitry Andric break;
1177c0981da4SDimitry Andric
1178c0981da4SDimitry Andric if (W > Max / (Base - T))
1179c0981da4SDimitry Andric return false;
1180c0981da4SDimitry Andric W *= (Base - T);
1181c0981da4SDimitry Andric }
1182c0981da4SDimitry Andric size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
1183c0981da4SDimitry Andric Bias = Adapt(I - OldI, NumPoints);
1184c0981da4SDimitry Andric
1185c0981da4SDimitry Andric if (I / NumPoints > Max - N)
1186c0981da4SDimitry Andric return false;
1187c0981da4SDimitry Andric N += I / NumPoints;
1188c0981da4SDimitry Andric I = I % NumPoints;
1189c0981da4SDimitry Andric
1190c0981da4SDimitry Andric // Insert N at position I in the output.
1191c0981da4SDimitry Andric char UTF8[4] = {};
1192c0981da4SDimitry Andric if (!encodeUTF8(N, UTF8))
1193c0981da4SDimitry Andric return false;
1194c0981da4SDimitry Andric Output.insert(OutputSize + I * 4, UTF8, 4);
1195c0981da4SDimitry Andric }
1196c0981da4SDimitry Andric
1197c0981da4SDimitry Andric removeNullBytes(Output, OutputSize);
1198c0981da4SDimitry Andric return true;
1199c0981da4SDimitry Andric }
1200c0981da4SDimitry Andric
printIdentifier(Identifier Ident)1201c0981da4SDimitry Andric void Demangler::printIdentifier(Identifier Ident) {
1202c0981da4SDimitry Andric if (Error || !Print)
1203c0981da4SDimitry Andric return;
1204c0981da4SDimitry Andric
1205c0981da4SDimitry Andric if (Ident.Punycode) {
1206c0981da4SDimitry Andric if (!decodePunycode(Ident.Name, Output))
1207c0981da4SDimitry Andric Error = true;
1208c0981da4SDimitry Andric } else {
1209c0981da4SDimitry Andric print(Ident.Name);
1210c0981da4SDimitry Andric }
1211c0981da4SDimitry Andric }
1212c0981da4SDimitry Andric
look() const1213344a3780SDimitry Andric char Demangler::look() const {
1214344a3780SDimitry Andric if (Error || Position >= Input.size())
1215344a3780SDimitry Andric return 0;
1216344a3780SDimitry Andric
1217344a3780SDimitry Andric return Input[Position];
1218344a3780SDimitry Andric }
1219344a3780SDimitry Andric
consume()1220344a3780SDimitry Andric char Demangler::consume() {
1221344a3780SDimitry Andric if (Error || Position >= Input.size()) {
1222344a3780SDimitry Andric Error = true;
1223344a3780SDimitry Andric return 0;
1224344a3780SDimitry Andric }
1225344a3780SDimitry Andric
1226344a3780SDimitry Andric return Input[Position++];
1227344a3780SDimitry Andric }
1228344a3780SDimitry Andric
consumeIf(char Prefix)1229344a3780SDimitry Andric bool Demangler::consumeIf(char Prefix) {
1230344a3780SDimitry Andric if (Error || Position >= Input.size() || Input[Position] != Prefix)
1231344a3780SDimitry Andric return false;
1232344a3780SDimitry Andric
1233344a3780SDimitry Andric Position += 1;
1234344a3780SDimitry Andric return true;
1235344a3780SDimitry Andric }
1236344a3780SDimitry Andric
1237344a3780SDimitry Andric /// Computes A + B. When computation wraps around sets the error and returns
1238344a3780SDimitry Andric /// false. Otherwise assigns the result to A and returns true.
addAssign(uint64_t & A,uint64_t B)1239344a3780SDimitry Andric bool Demangler::addAssign(uint64_t &A, uint64_t B) {
1240344a3780SDimitry Andric if (A > std::numeric_limits<uint64_t>::max() - B) {
1241344a3780SDimitry Andric Error = true;
1242344a3780SDimitry Andric return false;
1243344a3780SDimitry Andric }
1244344a3780SDimitry Andric
1245344a3780SDimitry Andric A += B;
1246344a3780SDimitry Andric return true;
1247344a3780SDimitry Andric }
1248344a3780SDimitry Andric
1249344a3780SDimitry Andric /// Computes A * B. When computation wraps around sets the error and returns
1250344a3780SDimitry Andric /// false. Otherwise assigns the result to A and returns true.
mulAssign(uint64_t & A,uint64_t B)1251344a3780SDimitry Andric bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
1252344a3780SDimitry Andric if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
1253344a3780SDimitry Andric Error = true;
1254344a3780SDimitry Andric return false;
1255344a3780SDimitry Andric }
1256344a3780SDimitry Andric
1257344a3780SDimitry Andric A *= B;
1258344a3780SDimitry Andric return true;
1259344a3780SDimitry Andric }
1260