123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578 |
- //===--- DLangDemangle.cpp ------------------------------------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- ///
- /// \file
- /// This file defines a demangler for the D programming language as specified
- /// in the ABI specification, available at:
- /// https://dlang.org/spec/abi.html#name_mangling
- ///
- //===----------------------------------------------------------------------===//
- #include "llvm/Demangle/Demangle.h"
- #include "llvm/Demangle/StringView.h"
- #include "llvm/Demangle/Utility.h"
- #include <cctype>
- #include <cstring>
- #include <limits>
- using namespace llvm;
- using llvm::itanium_demangle::OutputBuffer;
- using llvm::itanium_demangle::StringView;
- namespace {
- /// Demangle information structure.
- struct Demangler {
- /// Initialize the information structure we use to pass around information.
- ///
- /// \param Mangled String to demangle.
- Demangler(const char *Mangled);
- /// Extract and demangle the mangled symbol and append it to the output
- /// string.
- ///
- /// \param Demangled Output buffer to write the demangled name.
- ///
- /// \return The remaining string on success or nullptr on failure.
- ///
- /// \see https://dlang.org/spec/abi.html#name_mangling .
- /// \see https://dlang.org/spec/abi.html#MangledName .
- const char *parseMangle(OutputBuffer *Demangled);
- private:
- /// Extract and demangle a given mangled symbol and append it to the output
- /// string.
- ///
- /// \param Demangled output buffer to write the demangled name.
- /// \param Mangled mangled symbol to be demangled.
- ///
- /// \return The remaining string on success or nullptr on failure.
- ///
- /// \see https://dlang.org/spec/abi.html#name_mangling .
- /// \see https://dlang.org/spec/abi.html#MangledName .
- const char *parseMangle(OutputBuffer *Demangled, const char *Mangled);
- /// Extract the number from a given string.
- ///
- /// \param Mangled string to extract the number.
- /// \param Ret assigned result value.
- ///
- /// \return The remaining string on success or nullptr on failure.
- ///
- /// \note A result larger than UINT_MAX is considered a failure.
- ///
- /// \see https://dlang.org/spec/abi.html#Number .
- const char *decodeNumber(const char *Mangled, unsigned long &Ret);
- /// Extract the back reference position from a given string.
- ///
- /// \param Mangled string to extract the back reference position.
- /// \param Ret assigned result value.
- ///
- /// \return the remaining string on success or nullptr on failure.
- ///
- /// \note Ret is always >= 0 on success, and unspecified on failure
- ///
- /// \see https://dlang.org/spec/abi.html#back_ref .
- /// \see https://dlang.org/spec/abi.html#NumberBackRef .
- const char *decodeBackrefPos(const char *Mangled, long &Ret);
- /// Extract the symbol pointed by the back reference form a given string.
- ///
- /// \param Mangled string to extract the back reference position.
- /// \param Ret assigned result value.
- ///
- /// \return the remaining string on success or nullptr on failure.
- ///
- /// \see https://dlang.org/spec/abi.html#back_ref .
- const char *decodeBackref(const char *Mangled, const char *&Ret);
- /// Extract and demangle backreferenced symbol from a given mangled symbol
- /// and append it to the output string.
- ///
- /// \param Demangled output buffer to write the demangled name.
- /// \param Mangled mangled symbol to be demangled.
- ///
- /// \return the remaining string on success or nullptr on failure.
- ///
- /// \see https://dlang.org/spec/abi.html#back_ref .
- /// \see https://dlang.org/spec/abi.html#IdentifierBackRef .
- const char *parseSymbolBackref(OutputBuffer *Demangled, const char *Mangled);
- /// Extract and demangle backreferenced type from a given mangled symbol
- /// and append it to the output string.
- ///
- /// \param Mangled mangled symbol to be demangled.
- ///
- /// \return the remaining string on success or nullptr on failure.
- ///
- /// \see https://dlang.org/spec/abi.html#back_ref .
- /// \see https://dlang.org/spec/abi.html#TypeBackRef .
- const char *parseTypeBackref(const char *Mangled);
- /// Check whether it is the beginning of a symbol name.
- ///
- /// \param Mangled string to extract the symbol name.
- ///
- /// \return true on success, false otherwise.
- ///
- /// \see https://dlang.org/spec/abi.html#SymbolName .
- bool isSymbolName(const char *Mangled);
- /// Extract and demangle an identifier from a given mangled symbol append it
- /// to the output string.
- ///
- /// \param Demangled Output buffer to write the demangled name.
- /// \param Mangled Mangled symbol to be demangled.
- ///
- /// \return The remaining string on success or nullptr on failure.
- ///
- /// \see https://dlang.org/spec/abi.html#SymbolName .
- const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled);
- /// Extract and demangle the plain identifier from a given mangled symbol and
- /// prepend/append it to the output string, with a special treatment for some
- /// magic compiler generated symbols.
- ///
- /// \param Demangled Output buffer to write the demangled name.
- /// \param Mangled Mangled symbol to be demangled.
- /// \param Len Length of the mangled symbol name.
- ///
- /// \return The remaining string on success or nullptr on failure.
- ///
- /// \see https://dlang.org/spec/abi.html#LName .
- const char *parseLName(OutputBuffer *Demangled, const char *Mangled,
- unsigned long Len);
- /// Extract and demangle the qualified symbol from a given mangled symbol
- /// append it to the output string.
- ///
- /// \param Demangled Output buffer to write the demangled name.
- /// \param Mangled Mangled symbol to be demangled.
- ///
- /// \return The remaining string on success or nullptr on failure.
- ///
- /// \see https://dlang.org/spec/abi.html#QualifiedName .
- const char *parseQualified(OutputBuffer *Demangled, const char *Mangled);
- /// Extract and demangle a type from a given mangled symbol append it to
- /// the output string.
- ///
- /// \param Mangled mangled symbol to be demangled.
- ///
- /// \return the remaining string on success or nullptr on failure.
- ///
- /// \see https://dlang.org/spec/abi.html#Type .
- const char *parseType(const char *Mangled);
- /// The string we are demangling.
- const char *Str;
- /// The index of the last back reference.
- int LastBackref;
- };
- } // namespace
- const char *Demangler::decodeNumber(const char *Mangled, unsigned long &Ret) {
- // Return nullptr if trying to extract something that isn't a digit.
- if (Mangled == nullptr || !std::isdigit(*Mangled))
- return nullptr;
- unsigned long Val = 0;
- do {
- unsigned long Digit = Mangled[0] - '0';
- // Check for overflow.
- if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10)
- return nullptr;
- Val = Val * 10 + Digit;
- ++Mangled;
- } while (std::isdigit(*Mangled));
- if (*Mangled == '\0')
- return nullptr;
- Ret = Val;
- return Mangled;
- }
- const char *Demangler::decodeBackrefPos(const char *Mangled, long &Ret) {
- // Return nullptr if trying to extract something that isn't a digit
- if (Mangled == nullptr || !std::isalpha(*Mangled))
- return nullptr;
- // Any identifier or non-basic type that has been emitted to the mangled
- // symbol before will not be emitted again, but is referenced by a special
- // sequence encoding the relative position of the original occurrence in the
- // mangled symbol name.
- // Numbers in back references are encoded with base 26 by upper case letters
- // A-Z for higher digits but lower case letters a-z for the last digit.
- // NumberBackRef:
- // [a-z]
- // [A-Z] NumberBackRef
- // ^
- unsigned long Val = 0;
- while (std::isalpha(*Mangled)) {
- // Check for overflow
- if (Val > (std::numeric_limits<unsigned long>::max() - 25) / 26)
- break;
- Val *= 26;
- if (Mangled[0] >= 'a' && Mangled[0] <= 'z') {
- Val += Mangled[0] - 'a';
- if ((long)Val <= 0)
- break;
- Ret = Val;
- return Mangled + 1;
- }
- Val += Mangled[0] - 'A';
- ++Mangled;
- }
- return nullptr;
- }
- const char *Demangler::decodeBackref(const char *Mangled, const char *&Ret) {
- assert(Mangled != nullptr && *Mangled == 'Q' && "Invalid back reference!");
- Ret = nullptr;
- // Position of 'Q'
- const char *Qpos = Mangled;
- long RefPos;
- ++Mangled;
- Mangled = decodeBackrefPos(Mangled, RefPos);
- if (Mangled == nullptr)
- return nullptr;
- if (RefPos > Qpos - Str)
- return nullptr;
- // Set the position of the back reference.
- Ret = Qpos - RefPos;
- return Mangled;
- }
- const char *Demangler::parseSymbolBackref(OutputBuffer *Demangled,
- const char *Mangled) {
- // An identifier back reference always points to a digit 0 to 9.
- // IdentifierBackRef:
- // Q NumberBackRef
- // ^
- const char *Backref;
- unsigned long Len;
- // Get position of the back reference
- Mangled = decodeBackref(Mangled, Backref);
- // Must point to a simple identifier
- Backref = decodeNumber(Backref, Len);
- if (Backref == nullptr || strlen(Backref) < Len)
- return nullptr;
- Backref = parseLName(Demangled, Backref, Len);
- if (Backref == nullptr)
- return nullptr;
- return Mangled;
- }
- const char *Demangler::parseTypeBackref(const char *Mangled) {
- // A type back reference always points to a letter.
- // TypeBackRef:
- // Q NumberBackRef
- // ^
- const char *Backref;
- // If we appear to be moving backwards through the mangle string, then
- // bail as this may be a recursive back reference.
- if (Mangled - Str >= LastBackref)
- return nullptr;
- int SaveRefPos = LastBackref;
- LastBackref = Mangled - Str;
- // Get position of the back reference.
- Mangled = decodeBackref(Mangled, Backref);
- // Can't decode back reference.
- if (Backref == nullptr)
- return nullptr;
- // TODO: Add support for function type back references.
- Backref = parseType(Backref);
- LastBackref = SaveRefPos;
- if (Backref == nullptr)
- return nullptr;
- return Mangled;
- }
- bool Demangler::isSymbolName(const char *Mangled) {
- long Ret;
- const char *Qref = Mangled;
- if (std::isdigit(*Mangled))
- return true;
- // TODO: Handle template instances.
- if (*Mangled != 'Q')
- return false;
- Mangled = decodeBackrefPos(Mangled + 1, Ret);
- if (Mangled == nullptr || Ret > Qref - Str)
- return false;
- return std::isdigit(Qref[-Ret]);
- }
- const char *Demangler::parseMangle(OutputBuffer *Demangled,
- const char *Mangled) {
- // A D mangled symbol is comprised of both scope and type information.
- // MangleName:
- // _D QualifiedName Type
- // _D QualifiedName Z
- // ^
- // The caller should have guaranteed that the start pointer is at the
- // above location.
- // Note that type is never a function type, but only the return type of
- // a function or the type of a variable.
- Mangled += 2;
- Mangled = parseQualified(Demangled, Mangled);
- if (Mangled != nullptr) {
- // Artificial symbols end with 'Z' and have no type.
- if (*Mangled == 'Z')
- ++Mangled;
- else {
- Mangled = parseType(Mangled);
- }
- }
- return Mangled;
- }
- const char *Demangler::parseQualified(OutputBuffer *Demangled,
- const char *Mangled) {
- // Qualified names are identifiers separated by their encoded length.
- // Nested functions also encode their argument types without specifying
- // what they return.
- // QualifiedName:
- // SymbolFunctionName
- // SymbolFunctionName QualifiedName
- // ^
- // SymbolFunctionName:
- // SymbolName
- // SymbolName TypeFunctionNoReturn
- // SymbolName M TypeFunctionNoReturn
- // SymbolName M TypeModifiers TypeFunctionNoReturn
- // The start pointer should be at the above location.
- // Whether it has more than one symbol
- size_t NotFirst = false;
- do {
- // Skip over anonymous symbols.
- if (*Mangled == '0') {
- do
- ++Mangled;
- while (*Mangled == '0');
- continue;
- }
- if (NotFirst)
- *Demangled << '.';
- NotFirst = true;
- Mangled = parseIdentifier(Demangled, Mangled);
- } while (Mangled && isSymbolName(Mangled));
- return Mangled;
- }
- const char *Demangler::parseIdentifier(OutputBuffer *Demangled,
- const char *Mangled) {
- unsigned long Len;
- if (Mangled == nullptr || *Mangled == '\0')
- return nullptr;
- if (*Mangled == 'Q')
- return parseSymbolBackref(Demangled, Mangled);
- // TODO: Parse lengthless template instances.
- const char *Endptr = decodeNumber(Mangled, Len);
- if (Endptr == nullptr || Len == 0)
- return nullptr;
- if (strlen(Endptr) < Len)
- return nullptr;
- Mangled = Endptr;
- // TODO: Parse template instances with a length prefix.
- // There can be multiple different declarations in the same function that
- // have the same mangled name. To make the mangled names unique, a fake
- // parent in the form `__Sddd' is added to the symbol.
- if (Len >= 4 && Mangled[0] == '_' && Mangled[1] == '_' && Mangled[2] == 'S') {
- const char *NumPtr = Mangled + 3;
- while (NumPtr < (Mangled + Len) && std::isdigit(*NumPtr))
- ++NumPtr;
- if (Mangled + Len == NumPtr) {
- // Skip over the fake parent.
- Mangled += Len;
- return parseIdentifier(Demangled, Mangled);
- }
- // Else demangle it as a plain identifier.
- }
- return parseLName(Demangled, Mangled, Len);
- }
- const char *Demangler::parseType(const char *Mangled) {
- if (*Mangled == '\0')
- return nullptr;
- switch (*Mangled) {
- // TODO: Parse type qualifiers.
- // TODO: Parse function types.
- // TODO: Parse compound types.
- // TODO: Parse delegate types.
- // TODO: Parse tuple types.
- // Basic types.
- case 'i':
- ++Mangled;
- // TODO: Add type name dumping
- return Mangled;
- // TODO: Add support for the rest of the basic types.
- // Back referenced type.
- case 'Q':
- return parseTypeBackref(Mangled);
- default: // unhandled.
- return nullptr;
- }
- }
- const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled,
- unsigned long Len) {
- switch (Len) {
- case 6:
- if (strncmp(Mangled, "__initZ", Len + 1) == 0) {
- // The static initializer for a given symbol.
- Demangled->prepend("initializer for ");
- Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
- Mangled += Len;
- return Mangled;
- }
- if (strncmp(Mangled, "__vtblZ", Len + 1) == 0) {
- // The vtable symbol for a given class.
- Demangled->prepend("vtable for ");
- Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
- Mangled += Len;
- return Mangled;
- }
- break;
- case 7:
- if (strncmp(Mangled, "__ClassZ", Len + 1) == 0) {
- // The classinfo symbol for a given class.
- Demangled->prepend("ClassInfo for ");
- Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
- Mangled += Len;
- return Mangled;
- }
- break;
- case 11:
- if (strncmp(Mangled, "__InterfaceZ", Len + 1) == 0) {
- // The interface symbol for a given class.
- Demangled->prepend("Interface for ");
- Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
- Mangled += Len;
- return Mangled;
- }
- break;
- case 12:
- if (strncmp(Mangled, "__ModuleInfoZ", Len + 1) == 0) {
- // The ModuleInfo symbol for a given module.
- Demangled->prepend("ModuleInfo for ");
- Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
- Mangled += Len;
- return Mangled;
- }
- break;
- }
- *Demangled << StringView(Mangled, Len);
- Mangled += Len;
- return Mangled;
- }
- Demangler::Demangler(const char *Mangled)
- : Str(Mangled), LastBackref(strlen(Mangled)) {}
- const char *Demangler::parseMangle(OutputBuffer *Demangled) {
- return parseMangle(Demangled, this->Str);
- }
- char *llvm::dlangDemangle(const char *MangledName) {
- if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0)
- return nullptr;
- OutputBuffer Demangled;
- if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024))
- return nullptr;
- if (strcmp(MangledName, "_Dmain") == 0) {
- Demangled << "D main";
- } else {
- Demangler D = Demangler(MangledName);
- MangledName = D.parseMangle(&Demangled);
- // Check that the entire symbol was successfully demangled.
- if (MangledName == nullptr || *MangledName != '\0') {
- std::free(Demangled.getBuffer());
- return nullptr;
- }
- }
- // OutputBuffer's internal buffer is not null terminated and therefore we need
- // to add it to comply with C null terminated strings.
- if (Demangled.getCurrentPosition() > 0) {
- Demangled << '\0';
- Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1);
- return Demangled.getBuffer();
- }
- std::free(Demangled.getBuffer());
- return nullptr;
- }
|