DLangDemangle.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578
  1. //===--- DLangDemangle.cpp ------------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This file defines a demangler for the D programming language as specified
  11. /// in the ABI specification, available at:
  12. /// https://dlang.org/spec/abi.html#name_mangling
  13. ///
  14. //===----------------------------------------------------------------------===//
  15. #include "llvm/Demangle/Demangle.h"
  16. #include "llvm/Demangle/StringView.h"
  17. #include "llvm/Demangle/Utility.h"
  18. #include <cctype>
  19. #include <cstring>
  20. #include <limits>
  21. using namespace llvm;
  22. using llvm::itanium_demangle::OutputBuffer;
  23. using llvm::itanium_demangle::StringView;
  24. namespace {
  25. /// Demangle information structure.
  26. struct Demangler {
  27. /// Initialize the information structure we use to pass around information.
  28. ///
  29. /// \param Mangled String to demangle.
  30. Demangler(const char *Mangled);
  31. /// Extract and demangle the mangled symbol and append it to the output
  32. /// string.
  33. ///
  34. /// \param Demangled Output buffer to write the demangled name.
  35. ///
  36. /// \return The remaining string on success or nullptr on failure.
  37. ///
  38. /// \see https://dlang.org/spec/abi.html#name_mangling .
  39. /// \see https://dlang.org/spec/abi.html#MangledName .
  40. const char *parseMangle(OutputBuffer *Demangled);
  41. private:
  42. /// Extract and demangle a given mangled symbol and append it to the output
  43. /// string.
  44. ///
  45. /// \param Demangled output buffer to write the demangled name.
  46. /// \param Mangled mangled symbol to be demangled.
  47. ///
  48. /// \return The remaining string on success or nullptr on failure.
  49. ///
  50. /// \see https://dlang.org/spec/abi.html#name_mangling .
  51. /// \see https://dlang.org/spec/abi.html#MangledName .
  52. const char *parseMangle(OutputBuffer *Demangled, const char *Mangled);
  53. /// Extract the number from a given string.
  54. ///
  55. /// \param Mangled string to extract the number.
  56. /// \param Ret assigned result value.
  57. ///
  58. /// \return The remaining string on success or nullptr on failure.
  59. ///
  60. /// \note A result larger than UINT_MAX is considered a failure.
  61. ///
  62. /// \see https://dlang.org/spec/abi.html#Number .
  63. const char *decodeNumber(const char *Mangled, unsigned long &Ret);
  64. /// Extract the back reference position from a given string.
  65. ///
  66. /// \param Mangled string to extract the back reference position.
  67. /// \param Ret assigned result value.
  68. ///
  69. /// \return the remaining string on success or nullptr on failure.
  70. ///
  71. /// \note Ret is always >= 0 on success, and unspecified on failure
  72. ///
  73. /// \see https://dlang.org/spec/abi.html#back_ref .
  74. /// \see https://dlang.org/spec/abi.html#NumberBackRef .
  75. const char *decodeBackrefPos(const char *Mangled, long &Ret);
  76. /// Extract the symbol pointed by the back reference form a given string.
  77. ///
  78. /// \param Mangled string to extract the back reference position.
  79. /// \param Ret assigned result value.
  80. ///
  81. /// \return the remaining string on success or nullptr on failure.
  82. ///
  83. /// \see https://dlang.org/spec/abi.html#back_ref .
  84. const char *decodeBackref(const char *Mangled, const char *&Ret);
  85. /// Extract and demangle backreferenced symbol from a given mangled symbol
  86. /// and append it to the output string.
  87. ///
  88. /// \param Demangled output buffer to write the demangled name.
  89. /// \param Mangled mangled symbol to be demangled.
  90. ///
  91. /// \return the remaining string on success or nullptr on failure.
  92. ///
  93. /// \see https://dlang.org/spec/abi.html#back_ref .
  94. /// \see https://dlang.org/spec/abi.html#IdentifierBackRef .
  95. const char *parseSymbolBackref(OutputBuffer *Demangled, const char *Mangled);
  96. /// Extract and demangle backreferenced type from a given mangled symbol
  97. /// and append it to the output string.
  98. ///
  99. /// \param Mangled mangled symbol to be demangled.
  100. ///
  101. /// \return the remaining string on success or nullptr on failure.
  102. ///
  103. /// \see https://dlang.org/spec/abi.html#back_ref .
  104. /// \see https://dlang.org/spec/abi.html#TypeBackRef .
  105. const char *parseTypeBackref(const char *Mangled);
  106. /// Check whether it is the beginning of a symbol name.
  107. ///
  108. /// \param Mangled string to extract the symbol name.
  109. ///
  110. /// \return true on success, false otherwise.
  111. ///
  112. /// \see https://dlang.org/spec/abi.html#SymbolName .
  113. bool isSymbolName(const char *Mangled);
  114. /// Extract and demangle an identifier from a given mangled symbol append it
  115. /// to the output string.
  116. ///
  117. /// \param Demangled Output buffer to write the demangled name.
  118. /// \param Mangled Mangled symbol to be demangled.
  119. ///
  120. /// \return The remaining string on success or nullptr on failure.
  121. ///
  122. /// \see https://dlang.org/spec/abi.html#SymbolName .
  123. const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled);
  124. /// Extract and demangle the plain identifier from a given mangled symbol and
  125. /// prepend/append it to the output string, with a special treatment for some
  126. /// magic compiler generated symbols.
  127. ///
  128. /// \param Demangled Output buffer to write the demangled name.
  129. /// \param Mangled Mangled symbol to be demangled.
  130. /// \param Len Length of the mangled symbol name.
  131. ///
  132. /// \return The remaining string on success or nullptr on failure.
  133. ///
  134. /// \see https://dlang.org/spec/abi.html#LName .
  135. const char *parseLName(OutputBuffer *Demangled, const char *Mangled,
  136. unsigned long Len);
  137. /// Extract and demangle the qualified symbol from a given mangled symbol
  138. /// append it to the output string.
  139. ///
  140. /// \param Demangled Output buffer to write the demangled name.
  141. /// \param Mangled Mangled symbol to be demangled.
  142. ///
  143. /// \return The remaining string on success or nullptr on failure.
  144. ///
  145. /// \see https://dlang.org/spec/abi.html#QualifiedName .
  146. const char *parseQualified(OutputBuffer *Demangled, const char *Mangled);
  147. /// Extract and demangle a type from a given mangled symbol append it to
  148. /// the output string.
  149. ///
  150. /// \param Mangled mangled symbol to be demangled.
  151. ///
  152. /// \return the remaining string on success or nullptr on failure.
  153. ///
  154. /// \see https://dlang.org/spec/abi.html#Type .
  155. const char *parseType(const char *Mangled);
  156. /// The string we are demangling.
  157. const char *Str;
  158. /// The index of the last back reference.
  159. int LastBackref;
  160. };
  161. } // namespace
  162. const char *Demangler::decodeNumber(const char *Mangled, unsigned long &Ret) {
  163. // Return nullptr if trying to extract something that isn't a digit.
  164. if (Mangled == nullptr || !std::isdigit(*Mangled))
  165. return nullptr;
  166. unsigned long Val = 0;
  167. do {
  168. unsigned long Digit = Mangled[0] - '0';
  169. // Check for overflow.
  170. if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10)
  171. return nullptr;
  172. Val = Val * 10 + Digit;
  173. ++Mangled;
  174. } while (std::isdigit(*Mangled));
  175. if (*Mangled == '\0')
  176. return nullptr;
  177. Ret = Val;
  178. return Mangled;
  179. }
  180. const char *Demangler::decodeBackrefPos(const char *Mangled, long &Ret) {
  181. // Return nullptr if trying to extract something that isn't a digit
  182. if (Mangled == nullptr || !std::isalpha(*Mangled))
  183. return nullptr;
  184. // Any identifier or non-basic type that has been emitted to the mangled
  185. // symbol before will not be emitted again, but is referenced by a special
  186. // sequence encoding the relative position of the original occurrence in the
  187. // mangled symbol name.
  188. // Numbers in back references are encoded with base 26 by upper case letters
  189. // A-Z for higher digits but lower case letters a-z for the last digit.
  190. // NumberBackRef:
  191. // [a-z]
  192. // [A-Z] NumberBackRef
  193. // ^
  194. unsigned long Val = 0;
  195. while (std::isalpha(*Mangled)) {
  196. // Check for overflow
  197. if (Val > (std::numeric_limits<unsigned long>::max() - 25) / 26)
  198. break;
  199. Val *= 26;
  200. if (Mangled[0] >= 'a' && Mangled[0] <= 'z') {
  201. Val += Mangled[0] - 'a';
  202. if ((long)Val <= 0)
  203. break;
  204. Ret = Val;
  205. return Mangled + 1;
  206. }
  207. Val += Mangled[0] - 'A';
  208. ++Mangled;
  209. }
  210. return nullptr;
  211. }
  212. const char *Demangler::decodeBackref(const char *Mangled, const char *&Ret) {
  213. assert(Mangled != nullptr && *Mangled == 'Q' && "Invalid back reference!");
  214. Ret = nullptr;
  215. // Position of 'Q'
  216. const char *Qpos = Mangled;
  217. long RefPos;
  218. ++Mangled;
  219. Mangled = decodeBackrefPos(Mangled, RefPos);
  220. if (Mangled == nullptr)
  221. return nullptr;
  222. if (RefPos > Qpos - Str)
  223. return nullptr;
  224. // Set the position of the back reference.
  225. Ret = Qpos - RefPos;
  226. return Mangled;
  227. }
  228. const char *Demangler::parseSymbolBackref(OutputBuffer *Demangled,
  229. const char *Mangled) {
  230. // An identifier back reference always points to a digit 0 to 9.
  231. // IdentifierBackRef:
  232. // Q NumberBackRef
  233. // ^
  234. const char *Backref;
  235. unsigned long Len;
  236. // Get position of the back reference
  237. Mangled = decodeBackref(Mangled, Backref);
  238. // Must point to a simple identifier
  239. Backref = decodeNumber(Backref, Len);
  240. if (Backref == nullptr || strlen(Backref) < Len)
  241. return nullptr;
  242. Backref = parseLName(Demangled, Backref, Len);
  243. if (Backref == nullptr)
  244. return nullptr;
  245. return Mangled;
  246. }
  247. const char *Demangler::parseTypeBackref(const char *Mangled) {
  248. // A type back reference always points to a letter.
  249. // TypeBackRef:
  250. // Q NumberBackRef
  251. // ^
  252. const char *Backref;
  253. // If we appear to be moving backwards through the mangle string, then
  254. // bail as this may be a recursive back reference.
  255. if (Mangled - Str >= LastBackref)
  256. return nullptr;
  257. int SaveRefPos = LastBackref;
  258. LastBackref = Mangled - Str;
  259. // Get position of the back reference.
  260. Mangled = decodeBackref(Mangled, Backref);
  261. // Can't decode back reference.
  262. if (Backref == nullptr)
  263. return nullptr;
  264. // TODO: Add support for function type back references.
  265. Backref = parseType(Backref);
  266. LastBackref = SaveRefPos;
  267. if (Backref == nullptr)
  268. return nullptr;
  269. return Mangled;
  270. }
  271. bool Demangler::isSymbolName(const char *Mangled) {
  272. long Ret;
  273. const char *Qref = Mangled;
  274. if (std::isdigit(*Mangled))
  275. return true;
  276. // TODO: Handle template instances.
  277. if (*Mangled != 'Q')
  278. return false;
  279. Mangled = decodeBackrefPos(Mangled + 1, Ret);
  280. if (Mangled == nullptr || Ret > Qref - Str)
  281. return false;
  282. return std::isdigit(Qref[-Ret]);
  283. }
  284. const char *Demangler::parseMangle(OutputBuffer *Demangled,
  285. const char *Mangled) {
  286. // A D mangled symbol is comprised of both scope and type information.
  287. // MangleName:
  288. // _D QualifiedName Type
  289. // _D QualifiedName Z
  290. // ^
  291. // The caller should have guaranteed that the start pointer is at the
  292. // above location.
  293. // Note that type is never a function type, but only the return type of
  294. // a function or the type of a variable.
  295. Mangled += 2;
  296. Mangled = parseQualified(Demangled, Mangled);
  297. if (Mangled != nullptr) {
  298. // Artificial symbols end with 'Z' and have no type.
  299. if (*Mangled == 'Z')
  300. ++Mangled;
  301. else {
  302. Mangled = parseType(Mangled);
  303. }
  304. }
  305. return Mangled;
  306. }
  307. const char *Demangler::parseQualified(OutputBuffer *Demangled,
  308. const char *Mangled) {
  309. // Qualified names are identifiers separated by their encoded length.
  310. // Nested functions also encode their argument types without specifying
  311. // what they return.
  312. // QualifiedName:
  313. // SymbolFunctionName
  314. // SymbolFunctionName QualifiedName
  315. // ^
  316. // SymbolFunctionName:
  317. // SymbolName
  318. // SymbolName TypeFunctionNoReturn
  319. // SymbolName M TypeFunctionNoReturn
  320. // SymbolName M TypeModifiers TypeFunctionNoReturn
  321. // The start pointer should be at the above location.
  322. // Whether it has more than one symbol
  323. size_t NotFirst = false;
  324. do {
  325. // Skip over anonymous symbols.
  326. if (*Mangled == '0') {
  327. do
  328. ++Mangled;
  329. while (*Mangled == '0');
  330. continue;
  331. }
  332. if (NotFirst)
  333. *Demangled << '.';
  334. NotFirst = true;
  335. Mangled = parseIdentifier(Demangled, Mangled);
  336. } while (Mangled && isSymbolName(Mangled));
  337. return Mangled;
  338. }
  339. const char *Demangler::parseIdentifier(OutputBuffer *Demangled,
  340. const char *Mangled) {
  341. unsigned long Len;
  342. if (Mangled == nullptr || *Mangled == '\0')
  343. return nullptr;
  344. if (*Mangled == 'Q')
  345. return parseSymbolBackref(Demangled, Mangled);
  346. // TODO: Parse lengthless template instances.
  347. const char *Endptr = decodeNumber(Mangled, Len);
  348. if (Endptr == nullptr || Len == 0)
  349. return nullptr;
  350. if (strlen(Endptr) < Len)
  351. return nullptr;
  352. Mangled = Endptr;
  353. // TODO: Parse template instances with a length prefix.
  354. // There can be multiple different declarations in the same function that
  355. // have the same mangled name. To make the mangled names unique, a fake
  356. // parent in the form `__Sddd' is added to the symbol.
  357. if (Len >= 4 && Mangled[0] == '_' && Mangled[1] == '_' && Mangled[2] == 'S') {
  358. const char *NumPtr = Mangled + 3;
  359. while (NumPtr < (Mangled + Len) && std::isdigit(*NumPtr))
  360. ++NumPtr;
  361. if (Mangled + Len == NumPtr) {
  362. // Skip over the fake parent.
  363. Mangled += Len;
  364. return parseIdentifier(Demangled, Mangled);
  365. }
  366. // Else demangle it as a plain identifier.
  367. }
  368. return parseLName(Demangled, Mangled, Len);
  369. }
  370. const char *Demangler::parseType(const char *Mangled) {
  371. if (*Mangled == '\0')
  372. return nullptr;
  373. switch (*Mangled) {
  374. // TODO: Parse type qualifiers.
  375. // TODO: Parse function types.
  376. // TODO: Parse compound types.
  377. // TODO: Parse delegate types.
  378. // TODO: Parse tuple types.
  379. // Basic types.
  380. case 'i':
  381. ++Mangled;
  382. // TODO: Add type name dumping
  383. return Mangled;
  384. // TODO: Add support for the rest of the basic types.
  385. // Back referenced type.
  386. case 'Q':
  387. return parseTypeBackref(Mangled);
  388. default: // unhandled.
  389. return nullptr;
  390. }
  391. }
  392. const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled,
  393. unsigned long Len) {
  394. switch (Len) {
  395. case 6:
  396. if (strncmp(Mangled, "__initZ", Len + 1) == 0) {
  397. // The static initializer for a given symbol.
  398. Demangled->prepend("initializer for ");
  399. Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
  400. Mangled += Len;
  401. return Mangled;
  402. }
  403. if (strncmp(Mangled, "__vtblZ", Len + 1) == 0) {
  404. // The vtable symbol for a given class.
  405. Demangled->prepend("vtable for ");
  406. Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
  407. Mangled += Len;
  408. return Mangled;
  409. }
  410. break;
  411. case 7:
  412. if (strncmp(Mangled, "__ClassZ", Len + 1) == 0) {
  413. // The classinfo symbol for a given class.
  414. Demangled->prepend("ClassInfo for ");
  415. Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
  416. Mangled += Len;
  417. return Mangled;
  418. }
  419. break;
  420. case 11:
  421. if (strncmp(Mangled, "__InterfaceZ", Len + 1) == 0) {
  422. // The interface symbol for a given class.
  423. Demangled->prepend("Interface for ");
  424. Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
  425. Mangled += Len;
  426. return Mangled;
  427. }
  428. break;
  429. case 12:
  430. if (strncmp(Mangled, "__ModuleInfoZ", Len + 1) == 0) {
  431. // The ModuleInfo symbol for a given module.
  432. Demangled->prepend("ModuleInfo for ");
  433. Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
  434. Mangled += Len;
  435. return Mangled;
  436. }
  437. break;
  438. }
  439. *Demangled << StringView(Mangled, Len);
  440. Mangled += Len;
  441. return Mangled;
  442. }
  443. Demangler::Demangler(const char *Mangled)
  444. : Str(Mangled), LastBackref(strlen(Mangled)) {}
  445. const char *Demangler::parseMangle(OutputBuffer *Demangled) {
  446. return parseMangle(Demangled, this->Str);
  447. }
  448. char *llvm::dlangDemangle(const char *MangledName) {
  449. if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0)
  450. return nullptr;
  451. OutputBuffer Demangled;
  452. if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024))
  453. return nullptr;
  454. if (strcmp(MangledName, "_Dmain") == 0) {
  455. Demangled << "D main";
  456. } else {
  457. Demangler D = Demangler(MangledName);
  458. MangledName = D.parseMangle(&Demangled);
  459. // Check that the entire symbol was successfully demangled.
  460. if (MangledName == nullptr || *MangledName != '\0') {
  461. std::free(Demangled.getBuffer());
  462. return nullptr;
  463. }
  464. }
  465. // OutputBuffer's internal buffer is not null terminated and therefore we need
  466. // to add it to comply with C null terminated strings.
  467. if (Demangled.getCurrentPosition() > 0) {
  468. Demangled << '\0';
  469. Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1);
  470. return Demangled.getBuffer();
  471. }
  472. std::free(Demangled.getBuffer());
  473. return nullptr;
  474. }