12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295 |
- //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file defines a demangler for Rust v0 mangled symbols as specified in
- // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
- //
- //===----------------------------------------------------------------------===//
- #include "llvm/Demangle/Demangle.h"
- #include "llvm/Demangle/StringView.h"
- #include "llvm/Demangle/Utility.h"
- #include <algorithm>
- #include <cassert>
- #include <cstdint>
- #include <cstring>
- #include <limits>
- using namespace llvm;
- using llvm::itanium_demangle::OutputBuffer;
- using llvm::itanium_demangle::StringView;
- using llvm::itanium_demangle::SwapAndRestore;
- namespace {
- struct Identifier {
- StringView Name;
- bool Punycode;
- bool empty() const { return Name.empty(); }
- };
- enum class BasicType {
- Bool,
- Char,
- I8,
- I16,
- I32,
- I64,
- I128,
- ISize,
- U8,
- U16,
- U32,
- U64,
- U128,
- USize,
- F32,
- F64,
- Str,
- Placeholder,
- Unit,
- Variadic,
- Never,
- };
- enum class IsInType {
- No,
- Yes,
- };
- enum class LeaveGenericsOpen {
- No,
- Yes,
- };
- class Demangler {
- // Maximum recursion level. Used to avoid stack overflow.
- size_t MaxRecursionLevel;
- // Current recursion level.
- size_t RecursionLevel;
- size_t BoundLifetimes;
- // Input string that is being demangled with "_R" prefix removed.
- StringView Input;
- // Position in the input string.
- size_t Position;
- // When true, print methods append the output to the stream.
- // When false, the output is suppressed.
- bool Print;
- // True if an error occurred.
- bool Error;
- public:
- // Demangled output.
- OutputBuffer Output;
- Demangler(size_t MaxRecursionLevel = 500);
- bool demangle(StringView MangledName);
- private:
- bool demanglePath(IsInType Type,
- LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
- void demangleImplPath(IsInType InType);
- void demangleGenericArg();
- void demangleType();
- void demangleFnSig();
- void demangleDynBounds();
- void demangleDynTrait();
- void demangleOptionalBinder();
- void demangleConst();
- void demangleConstInt();
- void demangleConstBool();
- void demangleConstChar();
- template <typename Callable> void demangleBackref(Callable Demangler) {
- uint64_t Backref = parseBase62Number();
- if (Error || Backref >= Position) {
- Error = true;
- return;
- }
- if (!Print)
- return;
- SwapAndRestore<size_t> SavePosition(Position, Position);
- Position = Backref;
- Demangler();
- }
- Identifier parseIdentifier();
- uint64_t parseOptionalBase62Number(char Tag);
- uint64_t parseBase62Number();
- uint64_t parseDecimalNumber();
- uint64_t parseHexNumber(StringView &HexDigits);
- void print(char C);
- void print(StringView S);
- void printDecimalNumber(uint64_t N);
- void printBasicType(BasicType);
- void printLifetime(uint64_t Index);
- void printIdentifier(Identifier Ident);
- char look() const;
- char consume();
- bool consumeIf(char Prefix);
- bool addAssign(uint64_t &A, uint64_t B);
- bool mulAssign(uint64_t &A, uint64_t B);
- };
- } // namespace
- char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N,
- int *Status) {
- if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) {
- if (Status != nullptr)
- *Status = demangle_invalid_args;
- return nullptr;
- }
- // Return early if mangled name doesn't look like a Rust symbol.
- StringView Mangled(MangledName);
- if (!Mangled.startsWith("_R")) {
- if (Status != nullptr)
- *Status = demangle_invalid_mangled_name;
- return nullptr;
- }
- Demangler D;
- if (!initializeOutputBuffer(nullptr, nullptr, D.Output, 1024)) {
- if (Status != nullptr)
- *Status = demangle_memory_alloc_failure;
- return nullptr;
- }
- if (!D.demangle(Mangled)) {
- if (Status != nullptr)
- *Status = demangle_invalid_mangled_name;
- std::free(D.Output.getBuffer());
- return nullptr;
- }
- D.Output += '\0';
- char *Demangled = D.Output.getBuffer();
- size_t DemangledLen = D.Output.getCurrentPosition();
- if (Buf != nullptr) {
- if (DemangledLen <= *N) {
- std::memcpy(Buf, Demangled, DemangledLen);
- std::free(Demangled);
- Demangled = Buf;
- } else {
- std::free(Buf);
- }
- }
- if (N != nullptr)
- *N = DemangledLen;
- if (Status != nullptr)
- *Status = demangle_success;
- return Demangled;
- }
- Demangler::Demangler(size_t MaxRecursionLevel)
- : MaxRecursionLevel(MaxRecursionLevel) {}
- static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
- static inline bool isHexDigit(const char C) {
- return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
- }
- static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
- static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
- /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
- static inline bool isValid(const char C) {
- return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
- }
- // Demangles Rust v0 mangled symbol. Returns true when successful, and false
- // otherwise. The demangled symbol is stored in Output field. It is
- // responsibility of the caller to free the memory behind the output stream.
- //
- // <symbol-name> = "_R" <path> [<instantiating-crate>]
- bool Demangler::demangle(StringView Mangled) {
- Position = 0;
- Error = false;
- Print = true;
- RecursionLevel = 0;
- BoundLifetimes = 0;
- if (!Mangled.consumeFront("_R")) {
- Error = true;
- return false;
- }
- size_t Dot = Mangled.find('.');
- Input = Mangled.substr(0, Dot);
- StringView Suffix = Mangled.dropFront(Dot);
- demanglePath(IsInType::No);
- if (Position != Input.size()) {
- SwapAndRestore<bool> SavePrint(Print, false);
- demanglePath(IsInType::No);
- }
- if (Position != Input.size())
- Error = true;
- if (!Suffix.empty()) {
- print(" (");
- print(Suffix);
- print(")");
- }
- return !Error;
- }
- // Demangles a path. InType indicates whether a path is inside a type. When
- // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
- // output. Return value indicates whether generics arguments have been left
- // open.
- //
- // <path> = "C" <identifier> // crate root
- // | "M" <impl-path> <type> // <T> (inherent impl)
- // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)
- // | "Y" <type> <path> // <T as Trait> (trait definition)
- // | "N" <ns> <path> <identifier> // ...::ident (nested path)
- // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
- // | <backref>
- // <identifier> = [<disambiguator>] <undisambiguated-identifier>
- // <ns> = "C" // closure
- // | "S" // shim
- // | <A-Z> // other special namespaces
- // | <a-z> // internal namespaces
- bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
- if (Error || RecursionLevel >= MaxRecursionLevel) {
- Error = true;
- return false;
- }
- SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
- switch (consume()) {
- case 'C': {
- parseOptionalBase62Number('s');
- printIdentifier(parseIdentifier());
- break;
- }
- case 'M': {
- demangleImplPath(InType);
- print("<");
- demangleType();
- print(">");
- break;
- }
- case 'X': {
- demangleImplPath(InType);
- print("<");
- demangleType();
- print(" as ");
- demanglePath(IsInType::Yes);
- print(">");
- break;
- }
- case 'Y': {
- print("<");
- demangleType();
- print(" as ");
- demanglePath(IsInType::Yes);
- print(">");
- break;
- }
- case 'N': {
- char NS = consume();
- if (!isLower(NS) && !isUpper(NS)) {
- Error = true;
- break;
- }
- demanglePath(InType);
- uint64_t Disambiguator = parseOptionalBase62Number('s');
- Identifier Ident = parseIdentifier();
- if (isUpper(NS)) {
- // Special namespaces
- print("::{");
- if (NS == 'C')
- print("closure");
- else if (NS == 'S')
- print("shim");
- else
- print(NS);
- if (!Ident.empty()) {
- print(":");
- printIdentifier(Ident);
- }
- print('#');
- printDecimalNumber(Disambiguator);
- print('}');
- } else {
- // Implementation internal namespaces.
- if (!Ident.empty()) {
- print("::");
- printIdentifier(Ident);
- }
- }
- break;
- }
- case 'I': {
- demanglePath(InType);
- // Omit "::" when in a type, where it is optional.
- if (InType == IsInType::No)
- print("::");
- print("<");
- for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
- if (I > 0)
- print(", ");
- demangleGenericArg();
- }
- if (LeaveOpen == LeaveGenericsOpen::Yes)
- return true;
- else
- print(">");
- break;
- }
- case 'B': {
- bool IsOpen = false;
- demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
- return IsOpen;
- }
- default:
- Error = true;
- break;
- }
- return false;
- }
- // <impl-path> = [<disambiguator>] <path>
- // <disambiguator> = "s" <base-62-number>
- void Demangler::demangleImplPath(IsInType InType) {
- SwapAndRestore<bool> SavePrint(Print, false);
- parseOptionalBase62Number('s');
- demanglePath(InType);
- }
- // <generic-arg> = <lifetime>
- // | <type>
- // | "K" <const>
- // <lifetime> = "L" <base-62-number>
- void Demangler::demangleGenericArg() {
- if (consumeIf('L'))
- printLifetime(parseBase62Number());
- else if (consumeIf('K'))
- demangleConst();
- else
- demangleType();
- }
- // <basic-type> = "a" // i8
- // | "b" // bool
- // | "c" // char
- // | "d" // f64
- // | "e" // str
- // | "f" // f32
- // | "h" // u8
- // | "i" // isize
- // | "j" // usize
- // | "l" // i32
- // | "m" // u32
- // | "n" // i128
- // | "o" // u128
- // | "s" // i16
- // | "t" // u16
- // | "u" // ()
- // | "v" // ...
- // | "x" // i64
- // | "y" // u64
- // | "z" // !
- // | "p" // placeholder (e.g. for generic params), shown as _
- static bool parseBasicType(char C, BasicType &Type) {
- switch (C) {
- case 'a':
- Type = BasicType::I8;
- return true;
- case 'b':
- Type = BasicType::Bool;
- return true;
- case 'c':
- Type = BasicType::Char;
- return true;
- case 'd':
- Type = BasicType::F64;
- return true;
- case 'e':
- Type = BasicType::Str;
- return true;
- case 'f':
- Type = BasicType::F32;
- return true;
- case 'h':
- Type = BasicType::U8;
- return true;
- case 'i':
- Type = BasicType::ISize;
- return true;
- case 'j':
- Type = BasicType::USize;
- return true;
- case 'l':
- Type = BasicType::I32;
- return true;
- case 'm':
- Type = BasicType::U32;
- return true;
- case 'n':
- Type = BasicType::I128;
- return true;
- case 'o':
- Type = BasicType::U128;
- return true;
- case 'p':
- Type = BasicType::Placeholder;
- return true;
- case 's':
- Type = BasicType::I16;
- return true;
- case 't':
- Type = BasicType::U16;
- return true;
- case 'u':
- Type = BasicType::Unit;
- return true;
- case 'v':
- Type = BasicType::Variadic;
- return true;
- case 'x':
- Type = BasicType::I64;
- return true;
- case 'y':
- Type = BasicType::U64;
- return true;
- case 'z':
- Type = BasicType::Never;
- return true;
- default:
- return false;
- }
- }
- void Demangler::printBasicType(BasicType Type) {
- switch (Type) {
- case BasicType::Bool:
- print("bool");
- break;
- case BasicType::Char:
- print("char");
- break;
- case BasicType::I8:
- print("i8");
- break;
- case BasicType::I16:
- print("i16");
- break;
- case BasicType::I32:
- print("i32");
- break;
- case BasicType::I64:
- print("i64");
- break;
- case BasicType::I128:
- print("i128");
- break;
- case BasicType::ISize:
- print("isize");
- break;
- case BasicType::U8:
- print("u8");
- break;
- case BasicType::U16:
- print("u16");
- break;
- case BasicType::U32:
- print("u32");
- break;
- case BasicType::U64:
- print("u64");
- break;
- case BasicType::U128:
- print("u128");
- break;
- case BasicType::USize:
- print("usize");
- break;
- case BasicType::F32:
- print("f32");
- break;
- case BasicType::F64:
- print("f64");
- break;
- case BasicType::Str:
- print("str");
- break;
- case BasicType::Placeholder:
- print("_");
- break;
- case BasicType::Unit:
- print("()");
- break;
- case BasicType::Variadic:
- print("...");
- break;
- case BasicType::Never:
- print("!");
- break;
- }
- }
- // <type> = | <basic-type>
- // | <path> // named type
- // | "A" <type> <const> // [T; N]
- // | "S" <type> // [T]
- // | "T" {<type>} "E" // (T1, T2, T3, ...)
- // | "R" [<lifetime>] <type> // &T
- // | "Q" [<lifetime>] <type> // &mut T
- // | "P" <type> // *const T
- // | "O" <type> // *mut T
- // | "F" <fn-sig> // fn(...) -> ...
- // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
- // | <backref> // backref
- void Demangler::demangleType() {
- if (Error || RecursionLevel >= MaxRecursionLevel) {
- Error = true;
- return;
- }
- SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
- size_t Start = Position;
- char C = consume();
- BasicType Type;
- if (parseBasicType(C, Type))
- return printBasicType(Type);
- switch (C) {
- case 'A':
- print("[");
- demangleType();
- print("; ");
- demangleConst();
- print("]");
- break;
- case 'S':
- print("[");
- demangleType();
- print("]");
- break;
- case 'T': {
- print("(");
- size_t I = 0;
- for (; !Error && !consumeIf('E'); ++I) {
- if (I > 0)
- print(", ");
- demangleType();
- }
- if (I == 1)
- print(",");
- print(")");
- break;
- }
- case 'R':
- case 'Q':
- print('&');
- if (consumeIf('L')) {
- if (auto Lifetime = parseBase62Number()) {
- printLifetime(Lifetime);
- print(' ');
- }
- }
- if (C == 'Q')
- print("mut ");
- demangleType();
- break;
- case 'P':
- print("*const ");
- demangleType();
- break;
- case 'O':
- print("*mut ");
- demangleType();
- break;
- case 'F':
- demangleFnSig();
- break;
- case 'D':
- demangleDynBounds();
- if (consumeIf('L')) {
- if (auto Lifetime = parseBase62Number()) {
- print(" + ");
- printLifetime(Lifetime);
- }
- } else {
- Error = true;
- }
- break;
- case 'B':
- demangleBackref([&] { demangleType(); });
- break;
- default:
- Position = Start;
- demanglePath(IsInType::Yes);
- break;
- }
- }
- // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
- // <abi> = "C"
- // | <undisambiguated-identifier>
- void Demangler::demangleFnSig() {
- SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
- demangleOptionalBinder();
- if (consumeIf('U'))
- print("unsafe ");
- if (consumeIf('K')) {
- print("extern \"");
- if (consumeIf('C')) {
- print("C");
- } else {
- Identifier Ident = parseIdentifier();
- if (Ident.Punycode)
- Error = true;
- for (char C : Ident.Name) {
- // When mangling ABI string, the "-" is replaced with "_".
- if (C == '_')
- C = '-';
- print(C);
- }
- }
- print("\" ");
- }
- print("fn(");
- for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
- if (I > 0)
- print(", ");
- demangleType();
- }
- print(")");
- if (consumeIf('u')) {
- // Skip the unit type from the output.
- } else {
- print(" -> ");
- demangleType();
- }
- }
- // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
- void Demangler::demangleDynBounds() {
- SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
- print("dyn ");
- demangleOptionalBinder();
- for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
- if (I > 0)
- print(" + ");
- demangleDynTrait();
- }
- }
- // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
- // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
- void Demangler::demangleDynTrait() {
- bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
- while (!Error && consumeIf('p')) {
- if (!IsOpen) {
- IsOpen = true;
- print('<');
- } else {
- print(", ");
- }
- print(parseIdentifier().Name);
- print(" = ");
- demangleType();
- }
- if (IsOpen)
- print(">");
- }
- // Demangles optional binder and updates the number of bound lifetimes.
- //
- // <binder> = "G" <base-62-number>
- void Demangler::demangleOptionalBinder() {
- uint64_t Binder = parseOptionalBase62Number('G');
- if (Error || Binder == 0)
- return;
- // In valid inputs each bound lifetime is referenced later. Referencing a
- // lifetime requires at least one byte of input. Reject inputs that are too
- // short to reference all bound lifetimes. Otherwise demangling of invalid
- // binders could generate excessive amounts of output.
- if (Binder >= Input.size() - BoundLifetimes) {
- Error = true;
- return;
- }
- print("for<");
- for (size_t I = 0; I != Binder; ++I) {
- BoundLifetimes += 1;
- if (I > 0)
- print(", ");
- printLifetime(1);
- }
- print("> ");
- }
- // <const> = <basic-type> <const-data>
- // | "p" // placeholder
- // | <backref>
- void Demangler::demangleConst() {
- if (Error || RecursionLevel >= MaxRecursionLevel) {
- Error = true;
- return;
- }
- SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
- char C = consume();
- BasicType Type;
- if (parseBasicType(C, Type)) {
- switch (Type) {
- case BasicType::I8:
- case BasicType::I16:
- case BasicType::I32:
- case BasicType::I64:
- case BasicType::I128:
- case BasicType::ISize:
- case BasicType::U8:
- case BasicType::U16:
- case BasicType::U32:
- case BasicType::U64:
- case BasicType::U128:
- case BasicType::USize:
- demangleConstInt();
- break;
- case BasicType::Bool:
- demangleConstBool();
- break;
- case BasicType::Char:
- demangleConstChar();
- break;
- case BasicType::Placeholder:
- print('_');
- break;
- default:
- Error = true;
- break;
- }
- } else if (C == 'B') {
- demangleBackref([&] { demangleConst(); });
- } else {
- Error = true;
- }
- }
- // <const-data> = ["n"] <hex-number>
- void Demangler::demangleConstInt() {
- if (consumeIf('n'))
- print('-');
- StringView HexDigits;
- uint64_t Value = parseHexNumber(HexDigits);
- if (HexDigits.size() <= 16) {
- printDecimalNumber(Value);
- } else {
- print("0x");
- print(HexDigits);
- }
- }
- // <const-data> = "0_" // false
- // | "1_" // true
- void Demangler::demangleConstBool() {
- StringView HexDigits;
- parseHexNumber(HexDigits);
- if (HexDigits == "0")
- print("false");
- else if (HexDigits == "1")
- print("true");
- else
- Error = true;
- }
- /// Returns true if CodePoint represents a printable ASCII character.
- static bool isAsciiPrintable(uint64_t CodePoint) {
- return 0x20 <= CodePoint && CodePoint <= 0x7e;
- }
- // <const-data> = <hex-number>
- void Demangler::demangleConstChar() {
- StringView HexDigits;
- uint64_t CodePoint = parseHexNumber(HexDigits);
- if (Error || HexDigits.size() > 6) {
- Error = true;
- return;
- }
- print("'");
- switch (CodePoint) {
- case '\t':
- print(R"(\t)");
- break;
- case '\r':
- print(R"(\r)");
- break;
- case '\n':
- print(R"(\n)");
- break;
- case '\\':
- print(R"(\\)");
- break;
- case '"':
- print(R"(")");
- break;
- case '\'':
- print(R"(\')");
- break;
- default:
- if (isAsciiPrintable(CodePoint)) {
- char C = CodePoint;
- print(C);
- } else {
- print(R"(\u{)");
- print(HexDigits);
- print('}');
- }
- break;
- }
- print('\'');
- }
- // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
- Identifier Demangler::parseIdentifier() {
- bool Punycode = consumeIf('u');
- uint64_t Bytes = parseDecimalNumber();
- // Underscore resolves the ambiguity when identifier starts with a decimal
- // digit or another underscore.
- consumeIf('_');
- if (Error || Bytes > Input.size() - Position) {
- Error = true;
- return {};
- }
- StringView S = Input.substr(Position, Bytes);
- Position += Bytes;
- if (!std::all_of(S.begin(), S.end(), isValid)) {
- Error = true;
- return {};
- }
- return {S, Punycode};
- }
- // Parses optional base 62 number. The presence of a number is determined using
- // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
- //
- // This function is indended for parsing disambiguators and binders which when
- // not present have their value interpreted as 0, and otherwise as decoded
- // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
- // 2. When "G" is absent value is 0.
- uint64_t Demangler::parseOptionalBase62Number(char Tag) {
- if (!consumeIf(Tag))
- return 0;
- uint64_t N = parseBase62Number();
- if (Error || !addAssign(N, 1))
- return 0;
- return N;
- }
- // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
- // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
- // "1_" encodes 2, etc.
- //
- // <base-62-number> = {<0-9a-zA-Z>} "_"
- uint64_t Demangler::parseBase62Number() {
- if (consumeIf('_'))
- return 0;
- uint64_t Value = 0;
- while (true) {
- uint64_t Digit;
- char C = consume();
- if (C == '_') {
- break;
- } else if (isDigit(C)) {
- Digit = C - '0';
- } else if (isLower(C)) {
- Digit = 10 + (C - 'a');
- } else if (isUpper(C)) {
- Digit = 10 + 26 + (C - 'A');
- } else {
- Error = true;
- return 0;
- }
- if (!mulAssign(Value, 62))
- return 0;
- if (!addAssign(Value, Digit))
- return 0;
- }
- if (!addAssign(Value, 1))
- return 0;
- return Value;
- }
- // Parses a decimal number that had been encoded without any leading zeros.
- //
- // <decimal-number> = "0"
- // | <1-9> {<0-9>}
- uint64_t Demangler::parseDecimalNumber() {
- char C = look();
- if (!isDigit(C)) {
- Error = true;
- return 0;
- }
- if (C == '0') {
- consume();
- return 0;
- }
- uint64_t Value = 0;
- while (isDigit(look())) {
- if (!mulAssign(Value, 10)) {
- Error = true;
- return 0;
- }
- uint64_t D = consume() - '0';
- if (!addAssign(Value, D))
- return 0;
- }
- return Value;
- }
- // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
- // value and stores hex digits in HexDigits. The return value is unspecified if
- // HexDigits.size() > 16.
- //
- // <hex-number> = "0_"
- // | <1-9a-f> {<0-9a-f>} "_"
- uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
- size_t Start = Position;
- uint64_t Value = 0;
- if (!isHexDigit(look()))
- Error = true;
- if (consumeIf('0')) {
- if (!consumeIf('_'))
- Error = true;
- } else {
- while (!Error && !consumeIf('_')) {
- char C = consume();
- Value *= 16;
- if (isDigit(C))
- Value += C - '0';
- else if ('a' <= C && C <= 'f')
- Value += 10 + (C - 'a');
- else
- Error = true;
- }
- }
- if (Error) {
- HexDigits = StringView();
- return 0;
- }
- size_t End = Position - 1;
- assert(Start < End);
- HexDigits = Input.substr(Start, End - Start);
- return Value;
- }
- void Demangler::print(char C) {
- if (Error || !Print)
- return;
- Output += C;
- }
- void Demangler::print(StringView S) {
- if (Error || !Print)
- return;
- Output += S;
- }
- void Demangler::printDecimalNumber(uint64_t N) {
- if (Error || !Print)
- return;
- Output << N;
- }
- // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
- // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
- // bound by one of the enclosing binders.
- void Demangler::printLifetime(uint64_t Index) {
- if (Index == 0) {
- print("'_");
- return;
- }
- if (Index - 1 >= BoundLifetimes) {
- Error = true;
- return;
- }
- uint64_t Depth = BoundLifetimes - Index;
- print('\'');
- if (Depth < 26) {
- char C = 'a' + Depth;
- print(C);
- } else {
- print('z');
- printDecimalNumber(Depth - 26 + 1);
- }
- }
- static inline bool decodePunycodeDigit(char C, size_t &Value) {
- if (isLower(C)) {
- Value = C - 'a';
- return true;
- }
- if (isDigit(C)) {
- Value = 26 + (C - '0');
- return true;
- }
- return false;
- }
- static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
- char *Buffer = Output.getBuffer();
- char *Start = Buffer + StartIdx;
- char *End = Buffer + Output.getCurrentPosition();
- Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
- }
- // Encodes code point as UTF-8 and stores results in Output. Returns false if
- // CodePoint is not a valid unicode scalar value.
- static inline bool encodeUTF8(size_t CodePoint, char *Output) {
- if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
- return false;
- if (CodePoint <= 0x7F) {
- Output[0] = CodePoint;
- return true;
- }
- if (CodePoint <= 0x7FF) {
- Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
- Output[1] = 0x80 | (CodePoint & 0x3F);
- return true;
- }
- if (CodePoint <= 0xFFFF) {
- Output[0] = 0xE0 | (CodePoint >> 12);
- Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
- Output[2] = 0x80 | (CodePoint & 0x3F);
- return true;
- }
- if (CodePoint <= 0x10FFFF) {
- Output[0] = 0xF0 | (CodePoint >> 18);
- Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
- Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
- Output[3] = 0x80 | (CodePoint & 0x3F);
- return true;
- }
- return false;
- }
- // Decodes string encoded using punycode and appends results to Output.
- // Returns true if decoding was successful.
- static bool decodePunycode(StringView Input, OutputBuffer &Output) {
- size_t OutputSize = Output.getCurrentPosition();
- size_t InputIdx = 0;
- // Rust uses an underscore as a delimiter.
- size_t DelimiterPos = StringView::npos;
- for (size_t I = 0; I != Input.size(); ++I)
- if (Input[I] == '_')
- DelimiterPos = I;
- if (DelimiterPos != StringView::npos) {
- // Copy basic code points before the last delimiter to the output.
- for (; InputIdx != DelimiterPos; ++InputIdx) {
- char C = Input[InputIdx];
- if (!isValid(C))
- return false;
- // Code points are padded with zeros while decoding is in progress.
- char UTF8[4] = {C};
- Output += StringView(UTF8, UTF8 + 4);
- }
- // Skip over the delimiter.
- ++InputIdx;
- }
- size_t Base = 36;
- size_t Skew = 38;
- size_t Bias = 72;
- size_t N = 0x80;
- size_t TMin = 1;
- size_t TMax = 26;
- size_t Damp = 700;
- auto Adapt = [&](size_t Delta, size_t NumPoints) {
- Delta /= Damp;
- Delta += Delta / NumPoints;
- Damp = 2;
- size_t K = 0;
- while (Delta > (Base - TMin) * TMax / 2) {
- Delta /= Base - TMin;
- K += Base;
- }
- return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
- };
- // Main decoding loop.
- for (size_t I = 0; InputIdx != Input.size(); I += 1) {
- size_t OldI = I;
- size_t W = 1;
- size_t Max = std::numeric_limits<size_t>::max();
- for (size_t K = Base; true; K += Base) {
- if (InputIdx == Input.size())
- return false;
- char C = Input[InputIdx++];
- size_t Digit = 0;
- if (!decodePunycodeDigit(C, Digit))
- return false;
- if (Digit > (Max - I) / W)
- return false;
- I += Digit * W;
- size_t T;
- if (K <= Bias)
- T = TMin;
- else if (K >= Bias + TMax)
- T = TMax;
- else
- T = K - Bias;
- if (Digit < T)
- break;
- if (W > Max / (Base - T))
- return false;
- W *= (Base - T);
- }
- size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
- Bias = Adapt(I - OldI, NumPoints);
- if (I / NumPoints > Max - N)
- return false;
- N += I / NumPoints;
- I = I % NumPoints;
- // Insert N at position I in the output.
- char UTF8[4] = {};
- if (!encodeUTF8(N, UTF8))
- return false;
- Output.insert(OutputSize + I * 4, UTF8, 4);
- }
- removeNullBytes(Output, OutputSize);
- return true;
- }
- void Demangler::printIdentifier(Identifier Ident) {
- if (Error || !Print)
- return;
- if (Ident.Punycode) {
- if (!decodePunycode(Ident.Name, Output))
- Error = true;
- } else {
- print(Ident.Name);
- }
- }
- char Demangler::look() const {
- if (Error || Position >= Input.size())
- return 0;
- return Input[Position];
- }
- char Demangler::consume() {
- if (Error || Position >= Input.size()) {
- Error = true;
- return 0;
- }
- return Input[Position++];
- }
- bool Demangler::consumeIf(char Prefix) {
- if (Error || Position >= Input.size() || Input[Position] != Prefix)
- return false;
- Position += 1;
- return true;
- }
- /// Computes A + B. When computation wraps around sets the error and returns
- /// false. Otherwise assigns the result to A and returns true.
- bool Demangler::addAssign(uint64_t &A, uint64_t B) {
- if (A > std::numeric_limits<uint64_t>::max() - B) {
- Error = true;
- return false;
- }
- A += B;
- return true;
- }
- /// Computes A * B. When computation wraps around sets the error and returns
- /// false. Otherwise assigns the result to A and returns true.
- bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
- if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
- Error = true;
- return false;
- }
- A *= B;
- return true;
- }
|