RustDemangle.cpp 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262
  1. //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines a demangler for Rust v0 mangled symbols as specified in
  10. // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/Demangle/Demangle.h"
  14. #include "llvm/Demangle/StringView.h"
  15. #include "llvm/Demangle/Utility.h"
  16. #include <algorithm>
  17. #include <cassert>
  18. #include <cstdint>
  19. #include <cstring>
  20. #include <limits>
  21. using namespace llvm;
  22. using llvm::itanium_demangle::OutputBuffer;
  23. using llvm::itanium_demangle::ScopedOverride;
  24. using llvm::itanium_demangle::StringView;
  25. namespace {
  26. struct Identifier {
  27. StringView Name;
  28. bool Punycode;
  29. bool empty() const { return Name.empty(); }
  30. };
  31. enum class BasicType {
  32. Bool,
  33. Char,
  34. I8,
  35. I16,
  36. I32,
  37. I64,
  38. I128,
  39. ISize,
  40. U8,
  41. U16,
  42. U32,
  43. U64,
  44. U128,
  45. USize,
  46. F32,
  47. F64,
  48. Str,
  49. Placeholder,
  50. Unit,
  51. Variadic,
  52. Never,
  53. };
  54. enum class IsInType {
  55. No,
  56. Yes,
  57. };
  58. enum class LeaveGenericsOpen {
  59. No,
  60. Yes,
  61. };
  62. class Demangler {
  63. // Maximum recursion level. Used to avoid stack overflow.
  64. size_t MaxRecursionLevel;
  65. // Current recursion level.
  66. size_t RecursionLevel;
  67. size_t BoundLifetimes;
  68. // Input string that is being demangled with "_R" prefix removed.
  69. StringView Input;
  70. // Position in the input string.
  71. size_t Position;
  72. // When true, print methods append the output to the stream.
  73. // When false, the output is suppressed.
  74. bool Print;
  75. // True if an error occurred.
  76. bool Error;
  77. public:
  78. // Demangled output.
  79. OutputBuffer Output;
  80. Demangler(size_t MaxRecursionLevel = 500);
  81. bool demangle(StringView MangledName);
  82. private:
  83. bool demanglePath(IsInType Type,
  84. LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
  85. void demangleImplPath(IsInType InType);
  86. void demangleGenericArg();
  87. void demangleType();
  88. void demangleFnSig();
  89. void demangleDynBounds();
  90. void demangleDynTrait();
  91. void demangleOptionalBinder();
  92. void demangleConst();
  93. void demangleConstInt();
  94. void demangleConstBool();
  95. void demangleConstChar();
  96. template <typename Callable> void demangleBackref(Callable Demangler) {
  97. uint64_t Backref = parseBase62Number();
  98. if (Error || Backref >= Position) {
  99. Error = true;
  100. return;
  101. }
  102. if (!Print)
  103. return;
  104. ScopedOverride<size_t> SavePosition(Position, Position);
  105. Position = Backref;
  106. Demangler();
  107. }
  108. Identifier parseIdentifier();
  109. uint64_t parseOptionalBase62Number(char Tag);
  110. uint64_t parseBase62Number();
  111. uint64_t parseDecimalNumber();
  112. uint64_t parseHexNumber(StringView &HexDigits);
  113. void print(char C);
  114. void print(StringView S);
  115. void printDecimalNumber(uint64_t N);
  116. void printBasicType(BasicType);
  117. void printLifetime(uint64_t Index);
  118. void printIdentifier(Identifier Ident);
  119. char look() const;
  120. char consume();
  121. bool consumeIf(char Prefix);
  122. bool addAssign(uint64_t &A, uint64_t B);
  123. bool mulAssign(uint64_t &A, uint64_t B);
  124. };
  125. } // namespace
  126. char *llvm::rustDemangle(const char *MangledName) {
  127. if (MangledName == nullptr)
  128. return nullptr;
  129. // Return early if mangled name doesn't look like a Rust symbol.
  130. StringView Mangled(MangledName);
  131. if (!Mangled.startsWith("_R"))
  132. return nullptr;
  133. Demangler D;
  134. if (!D.demangle(Mangled)) {
  135. std::free(D.Output.getBuffer());
  136. return nullptr;
  137. }
  138. D.Output += '\0';
  139. return D.Output.getBuffer();
  140. }
  141. Demangler::Demangler(size_t MaxRecursionLevel)
  142. : MaxRecursionLevel(MaxRecursionLevel) {}
  143. static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
  144. static inline bool isHexDigit(const char C) {
  145. return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
  146. }
  147. static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
  148. static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
  149. /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
  150. static inline bool isValid(const char C) {
  151. return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
  152. }
  153. // Demangles Rust v0 mangled symbol. Returns true when successful, and false
  154. // otherwise. The demangled symbol is stored in Output field. It is
  155. // responsibility of the caller to free the memory behind the output stream.
  156. //
  157. // <symbol-name> = "_R" <path> [<instantiating-crate>]
  158. bool Demangler::demangle(StringView Mangled) {
  159. Position = 0;
  160. Error = false;
  161. Print = true;
  162. RecursionLevel = 0;
  163. BoundLifetimes = 0;
  164. if (!Mangled.consumeFront("_R")) {
  165. Error = true;
  166. return false;
  167. }
  168. size_t Dot = Mangled.find('.');
  169. Input = Mangled.substr(0, Dot);
  170. StringView Suffix = Mangled.dropFront(Dot);
  171. demanglePath(IsInType::No);
  172. if (Position != Input.size()) {
  173. ScopedOverride<bool> SavePrint(Print, false);
  174. demanglePath(IsInType::No);
  175. }
  176. if (Position != Input.size())
  177. Error = true;
  178. if (!Suffix.empty()) {
  179. print(" (");
  180. print(Suffix);
  181. print(")");
  182. }
  183. return !Error;
  184. }
  185. // Demangles a path. InType indicates whether a path is inside a type. When
  186. // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
  187. // output. Return value indicates whether generics arguments have been left
  188. // open.
  189. //
  190. // <path> = "C" <identifier> // crate root
  191. // | "M" <impl-path> <type> // <T> (inherent impl)
  192. // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)
  193. // | "Y" <type> <path> // <T as Trait> (trait definition)
  194. // | "N" <ns> <path> <identifier> // ...::ident (nested path)
  195. // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
  196. // | <backref>
  197. // <identifier> = [<disambiguator>] <undisambiguated-identifier>
  198. // <ns> = "C" // closure
  199. // | "S" // shim
  200. // | <A-Z> // other special namespaces
  201. // | <a-z> // internal namespaces
  202. bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
  203. if (Error || RecursionLevel >= MaxRecursionLevel) {
  204. Error = true;
  205. return false;
  206. }
  207. ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
  208. switch (consume()) {
  209. case 'C': {
  210. parseOptionalBase62Number('s');
  211. printIdentifier(parseIdentifier());
  212. break;
  213. }
  214. case 'M': {
  215. demangleImplPath(InType);
  216. print("<");
  217. demangleType();
  218. print(">");
  219. break;
  220. }
  221. case 'X': {
  222. demangleImplPath(InType);
  223. print("<");
  224. demangleType();
  225. print(" as ");
  226. demanglePath(IsInType::Yes);
  227. print(">");
  228. break;
  229. }
  230. case 'Y': {
  231. print("<");
  232. demangleType();
  233. print(" as ");
  234. demanglePath(IsInType::Yes);
  235. print(">");
  236. break;
  237. }
  238. case 'N': {
  239. char NS = consume();
  240. if (!isLower(NS) && !isUpper(NS)) {
  241. Error = true;
  242. break;
  243. }
  244. demanglePath(InType);
  245. uint64_t Disambiguator = parseOptionalBase62Number('s');
  246. Identifier Ident = parseIdentifier();
  247. if (isUpper(NS)) {
  248. // Special namespaces
  249. print("::{");
  250. if (NS == 'C')
  251. print("closure");
  252. else if (NS == 'S')
  253. print("shim");
  254. else
  255. print(NS);
  256. if (!Ident.empty()) {
  257. print(":");
  258. printIdentifier(Ident);
  259. }
  260. print('#');
  261. printDecimalNumber(Disambiguator);
  262. print('}');
  263. } else {
  264. // Implementation internal namespaces.
  265. if (!Ident.empty()) {
  266. print("::");
  267. printIdentifier(Ident);
  268. }
  269. }
  270. break;
  271. }
  272. case 'I': {
  273. demanglePath(InType);
  274. // Omit "::" when in a type, where it is optional.
  275. if (InType == IsInType::No)
  276. print("::");
  277. print("<");
  278. for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
  279. if (I > 0)
  280. print(", ");
  281. demangleGenericArg();
  282. }
  283. if (LeaveOpen == LeaveGenericsOpen::Yes)
  284. return true;
  285. else
  286. print(">");
  287. break;
  288. }
  289. case 'B': {
  290. bool IsOpen = false;
  291. demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
  292. return IsOpen;
  293. }
  294. default:
  295. Error = true;
  296. break;
  297. }
  298. return false;
  299. }
  300. // <impl-path> = [<disambiguator>] <path>
  301. // <disambiguator> = "s" <base-62-number>
  302. void Demangler::demangleImplPath(IsInType InType) {
  303. ScopedOverride<bool> SavePrint(Print, false);
  304. parseOptionalBase62Number('s');
  305. demanglePath(InType);
  306. }
  307. // <generic-arg> = <lifetime>
  308. // | <type>
  309. // | "K" <const>
  310. // <lifetime> = "L" <base-62-number>
  311. void Demangler::demangleGenericArg() {
  312. if (consumeIf('L'))
  313. printLifetime(parseBase62Number());
  314. else if (consumeIf('K'))
  315. demangleConst();
  316. else
  317. demangleType();
  318. }
  319. // <basic-type> = "a" // i8
  320. // | "b" // bool
  321. // | "c" // char
  322. // | "d" // f64
  323. // | "e" // str
  324. // | "f" // f32
  325. // | "h" // u8
  326. // | "i" // isize
  327. // | "j" // usize
  328. // | "l" // i32
  329. // | "m" // u32
  330. // | "n" // i128
  331. // | "o" // u128
  332. // | "s" // i16
  333. // | "t" // u16
  334. // | "u" // ()
  335. // | "v" // ...
  336. // | "x" // i64
  337. // | "y" // u64
  338. // | "z" // !
  339. // | "p" // placeholder (e.g. for generic params), shown as _
  340. static bool parseBasicType(char C, BasicType &Type) {
  341. switch (C) {
  342. case 'a':
  343. Type = BasicType::I8;
  344. return true;
  345. case 'b':
  346. Type = BasicType::Bool;
  347. return true;
  348. case 'c':
  349. Type = BasicType::Char;
  350. return true;
  351. case 'd':
  352. Type = BasicType::F64;
  353. return true;
  354. case 'e':
  355. Type = BasicType::Str;
  356. return true;
  357. case 'f':
  358. Type = BasicType::F32;
  359. return true;
  360. case 'h':
  361. Type = BasicType::U8;
  362. return true;
  363. case 'i':
  364. Type = BasicType::ISize;
  365. return true;
  366. case 'j':
  367. Type = BasicType::USize;
  368. return true;
  369. case 'l':
  370. Type = BasicType::I32;
  371. return true;
  372. case 'm':
  373. Type = BasicType::U32;
  374. return true;
  375. case 'n':
  376. Type = BasicType::I128;
  377. return true;
  378. case 'o':
  379. Type = BasicType::U128;
  380. return true;
  381. case 'p':
  382. Type = BasicType::Placeholder;
  383. return true;
  384. case 's':
  385. Type = BasicType::I16;
  386. return true;
  387. case 't':
  388. Type = BasicType::U16;
  389. return true;
  390. case 'u':
  391. Type = BasicType::Unit;
  392. return true;
  393. case 'v':
  394. Type = BasicType::Variadic;
  395. return true;
  396. case 'x':
  397. Type = BasicType::I64;
  398. return true;
  399. case 'y':
  400. Type = BasicType::U64;
  401. return true;
  402. case 'z':
  403. Type = BasicType::Never;
  404. return true;
  405. default:
  406. return false;
  407. }
  408. }
  409. void Demangler::printBasicType(BasicType Type) {
  410. switch (Type) {
  411. case BasicType::Bool:
  412. print("bool");
  413. break;
  414. case BasicType::Char:
  415. print("char");
  416. break;
  417. case BasicType::I8:
  418. print("i8");
  419. break;
  420. case BasicType::I16:
  421. print("i16");
  422. break;
  423. case BasicType::I32:
  424. print("i32");
  425. break;
  426. case BasicType::I64:
  427. print("i64");
  428. break;
  429. case BasicType::I128:
  430. print("i128");
  431. break;
  432. case BasicType::ISize:
  433. print("isize");
  434. break;
  435. case BasicType::U8:
  436. print("u8");
  437. break;
  438. case BasicType::U16:
  439. print("u16");
  440. break;
  441. case BasicType::U32:
  442. print("u32");
  443. break;
  444. case BasicType::U64:
  445. print("u64");
  446. break;
  447. case BasicType::U128:
  448. print("u128");
  449. break;
  450. case BasicType::USize:
  451. print("usize");
  452. break;
  453. case BasicType::F32:
  454. print("f32");
  455. break;
  456. case BasicType::F64:
  457. print("f64");
  458. break;
  459. case BasicType::Str:
  460. print("str");
  461. break;
  462. case BasicType::Placeholder:
  463. print("_");
  464. break;
  465. case BasicType::Unit:
  466. print("()");
  467. break;
  468. case BasicType::Variadic:
  469. print("...");
  470. break;
  471. case BasicType::Never:
  472. print("!");
  473. break;
  474. }
  475. }
  476. // <type> = | <basic-type>
  477. // | <path> // named type
  478. // | "A" <type> <const> // [T; N]
  479. // | "S" <type> // [T]
  480. // | "T" {<type>} "E" // (T1, T2, T3, ...)
  481. // | "R" [<lifetime>] <type> // &T
  482. // | "Q" [<lifetime>] <type> // &mut T
  483. // | "P" <type> // *const T
  484. // | "O" <type> // *mut T
  485. // | "F" <fn-sig> // fn(...) -> ...
  486. // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
  487. // | <backref> // backref
  488. void Demangler::demangleType() {
  489. if (Error || RecursionLevel >= MaxRecursionLevel) {
  490. Error = true;
  491. return;
  492. }
  493. ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
  494. size_t Start = Position;
  495. char C = consume();
  496. BasicType Type;
  497. if (parseBasicType(C, Type))
  498. return printBasicType(Type);
  499. switch (C) {
  500. case 'A':
  501. print("[");
  502. demangleType();
  503. print("; ");
  504. demangleConst();
  505. print("]");
  506. break;
  507. case 'S':
  508. print("[");
  509. demangleType();
  510. print("]");
  511. break;
  512. case 'T': {
  513. print("(");
  514. size_t I = 0;
  515. for (; !Error && !consumeIf('E'); ++I) {
  516. if (I > 0)
  517. print(", ");
  518. demangleType();
  519. }
  520. if (I == 1)
  521. print(",");
  522. print(")");
  523. break;
  524. }
  525. case 'R':
  526. case 'Q':
  527. print('&');
  528. if (consumeIf('L')) {
  529. if (auto Lifetime = parseBase62Number()) {
  530. printLifetime(Lifetime);
  531. print(' ');
  532. }
  533. }
  534. if (C == 'Q')
  535. print("mut ");
  536. demangleType();
  537. break;
  538. case 'P':
  539. print("*const ");
  540. demangleType();
  541. break;
  542. case 'O':
  543. print("*mut ");
  544. demangleType();
  545. break;
  546. case 'F':
  547. demangleFnSig();
  548. break;
  549. case 'D':
  550. demangleDynBounds();
  551. if (consumeIf('L')) {
  552. if (auto Lifetime = parseBase62Number()) {
  553. print(" + ");
  554. printLifetime(Lifetime);
  555. }
  556. } else {
  557. Error = true;
  558. }
  559. break;
  560. case 'B':
  561. demangleBackref([&] { demangleType(); });
  562. break;
  563. default:
  564. Position = Start;
  565. demanglePath(IsInType::Yes);
  566. break;
  567. }
  568. }
  569. // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
  570. // <abi> = "C"
  571. // | <undisambiguated-identifier>
  572. void Demangler::demangleFnSig() {
  573. ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
  574. demangleOptionalBinder();
  575. if (consumeIf('U'))
  576. print("unsafe ");
  577. if (consumeIf('K')) {
  578. print("extern \"");
  579. if (consumeIf('C')) {
  580. print("C");
  581. } else {
  582. Identifier Ident = parseIdentifier();
  583. if (Ident.Punycode)
  584. Error = true;
  585. for (char C : Ident.Name) {
  586. // When mangling ABI string, the "-" is replaced with "_".
  587. if (C == '_')
  588. C = '-';
  589. print(C);
  590. }
  591. }
  592. print("\" ");
  593. }
  594. print("fn(");
  595. for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
  596. if (I > 0)
  597. print(", ");
  598. demangleType();
  599. }
  600. print(")");
  601. if (consumeIf('u')) {
  602. // Skip the unit type from the output.
  603. } else {
  604. print(" -> ");
  605. demangleType();
  606. }
  607. }
  608. // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
  609. void Demangler::demangleDynBounds() {
  610. ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
  611. print("dyn ");
  612. demangleOptionalBinder();
  613. for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
  614. if (I > 0)
  615. print(" + ");
  616. demangleDynTrait();
  617. }
  618. }
  619. // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
  620. // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
  621. void Demangler::demangleDynTrait() {
  622. bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
  623. while (!Error && consumeIf('p')) {
  624. if (!IsOpen) {
  625. IsOpen = true;
  626. print('<');
  627. } else {
  628. print(", ");
  629. }
  630. print(parseIdentifier().Name);
  631. print(" = ");
  632. demangleType();
  633. }
  634. if (IsOpen)
  635. print(">");
  636. }
  637. // Demangles optional binder and updates the number of bound lifetimes.
  638. //
  639. // <binder> = "G" <base-62-number>
  640. void Demangler::demangleOptionalBinder() {
  641. uint64_t Binder = parseOptionalBase62Number('G');
  642. if (Error || Binder == 0)
  643. return;
  644. // In valid inputs each bound lifetime is referenced later. Referencing a
  645. // lifetime requires at least one byte of input. Reject inputs that are too
  646. // short to reference all bound lifetimes. Otherwise demangling of invalid
  647. // binders could generate excessive amounts of output.
  648. if (Binder >= Input.size() - BoundLifetimes) {
  649. Error = true;
  650. return;
  651. }
  652. print("for<");
  653. for (size_t I = 0; I != Binder; ++I) {
  654. BoundLifetimes += 1;
  655. if (I > 0)
  656. print(", ");
  657. printLifetime(1);
  658. }
  659. print("> ");
  660. }
  661. // <const> = <basic-type> <const-data>
  662. // | "p" // placeholder
  663. // | <backref>
  664. void Demangler::demangleConst() {
  665. if (Error || RecursionLevel >= MaxRecursionLevel) {
  666. Error = true;
  667. return;
  668. }
  669. ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
  670. char C = consume();
  671. BasicType Type;
  672. if (parseBasicType(C, Type)) {
  673. switch (Type) {
  674. case BasicType::I8:
  675. case BasicType::I16:
  676. case BasicType::I32:
  677. case BasicType::I64:
  678. case BasicType::I128:
  679. case BasicType::ISize:
  680. case BasicType::U8:
  681. case BasicType::U16:
  682. case BasicType::U32:
  683. case BasicType::U64:
  684. case BasicType::U128:
  685. case BasicType::USize:
  686. demangleConstInt();
  687. break;
  688. case BasicType::Bool:
  689. demangleConstBool();
  690. break;
  691. case BasicType::Char:
  692. demangleConstChar();
  693. break;
  694. case BasicType::Placeholder:
  695. print('_');
  696. break;
  697. default:
  698. Error = true;
  699. break;
  700. }
  701. } else if (C == 'B') {
  702. demangleBackref([&] { demangleConst(); });
  703. } else {
  704. Error = true;
  705. }
  706. }
  707. // <const-data> = ["n"] <hex-number>
  708. void Demangler::demangleConstInt() {
  709. if (consumeIf('n'))
  710. print('-');
  711. StringView HexDigits;
  712. uint64_t Value = parseHexNumber(HexDigits);
  713. if (HexDigits.size() <= 16) {
  714. printDecimalNumber(Value);
  715. } else {
  716. print("0x");
  717. print(HexDigits);
  718. }
  719. }
  720. // <const-data> = "0_" // false
  721. // | "1_" // true
  722. void Demangler::demangleConstBool() {
  723. StringView HexDigits;
  724. parseHexNumber(HexDigits);
  725. if (HexDigits == "0")
  726. print("false");
  727. else if (HexDigits == "1")
  728. print("true");
  729. else
  730. Error = true;
  731. }
  732. /// Returns true if CodePoint represents a printable ASCII character.
  733. static bool isAsciiPrintable(uint64_t CodePoint) {
  734. return 0x20 <= CodePoint && CodePoint <= 0x7e;
  735. }
  736. // <const-data> = <hex-number>
  737. void Demangler::demangleConstChar() {
  738. StringView HexDigits;
  739. uint64_t CodePoint = parseHexNumber(HexDigits);
  740. if (Error || HexDigits.size() > 6) {
  741. Error = true;
  742. return;
  743. }
  744. print("'");
  745. switch (CodePoint) {
  746. case '\t':
  747. print(R"(\t)");
  748. break;
  749. case '\r':
  750. print(R"(\r)");
  751. break;
  752. case '\n':
  753. print(R"(\n)");
  754. break;
  755. case '\\':
  756. print(R"(\\)");
  757. break;
  758. case '"':
  759. print(R"(")");
  760. break;
  761. case '\'':
  762. print(R"(\')");
  763. break;
  764. default:
  765. if (isAsciiPrintable(CodePoint)) {
  766. char C = CodePoint;
  767. print(C);
  768. } else {
  769. print(R"(\u{)");
  770. print(HexDigits);
  771. print('}');
  772. }
  773. break;
  774. }
  775. print('\'');
  776. }
  777. // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
  778. Identifier Demangler::parseIdentifier() {
  779. bool Punycode = consumeIf('u');
  780. uint64_t Bytes = parseDecimalNumber();
  781. // Underscore resolves the ambiguity when identifier starts with a decimal
  782. // digit or another underscore.
  783. consumeIf('_');
  784. if (Error || Bytes > Input.size() - Position) {
  785. Error = true;
  786. return {};
  787. }
  788. StringView S = Input.substr(Position, Bytes);
  789. Position += Bytes;
  790. if (!std::all_of(S.begin(), S.end(), isValid)) {
  791. Error = true;
  792. return {};
  793. }
  794. return {S, Punycode};
  795. }
  796. // Parses optional base 62 number. The presence of a number is determined using
  797. // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
  798. //
  799. // This function is indended for parsing disambiguators and binders which when
  800. // not present have their value interpreted as 0, and otherwise as decoded
  801. // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
  802. // 2. When "G" is absent value is 0.
  803. uint64_t Demangler::parseOptionalBase62Number(char Tag) {
  804. if (!consumeIf(Tag))
  805. return 0;
  806. uint64_t N = parseBase62Number();
  807. if (Error || !addAssign(N, 1))
  808. return 0;
  809. return N;
  810. }
  811. // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
  812. // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
  813. // "1_" encodes 2, etc.
  814. //
  815. // <base-62-number> = {<0-9a-zA-Z>} "_"
  816. uint64_t Demangler::parseBase62Number() {
  817. if (consumeIf('_'))
  818. return 0;
  819. uint64_t Value = 0;
  820. while (true) {
  821. uint64_t Digit;
  822. char C = consume();
  823. if (C == '_') {
  824. break;
  825. } else if (isDigit(C)) {
  826. Digit = C - '0';
  827. } else if (isLower(C)) {
  828. Digit = 10 + (C - 'a');
  829. } else if (isUpper(C)) {
  830. Digit = 10 + 26 + (C - 'A');
  831. } else {
  832. Error = true;
  833. return 0;
  834. }
  835. if (!mulAssign(Value, 62))
  836. return 0;
  837. if (!addAssign(Value, Digit))
  838. return 0;
  839. }
  840. if (!addAssign(Value, 1))
  841. return 0;
  842. return Value;
  843. }
  844. // Parses a decimal number that had been encoded without any leading zeros.
  845. //
  846. // <decimal-number> = "0"
  847. // | <1-9> {<0-9>}
  848. uint64_t Demangler::parseDecimalNumber() {
  849. char C = look();
  850. if (!isDigit(C)) {
  851. Error = true;
  852. return 0;
  853. }
  854. if (C == '0') {
  855. consume();
  856. return 0;
  857. }
  858. uint64_t Value = 0;
  859. while (isDigit(look())) {
  860. if (!mulAssign(Value, 10)) {
  861. Error = true;
  862. return 0;
  863. }
  864. uint64_t D = consume() - '0';
  865. if (!addAssign(Value, D))
  866. return 0;
  867. }
  868. return Value;
  869. }
  870. // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
  871. // value and stores hex digits in HexDigits. The return value is unspecified if
  872. // HexDigits.size() > 16.
  873. //
  874. // <hex-number> = "0_"
  875. // | <1-9a-f> {<0-9a-f>} "_"
  876. uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
  877. size_t Start = Position;
  878. uint64_t Value = 0;
  879. if (!isHexDigit(look()))
  880. Error = true;
  881. if (consumeIf('0')) {
  882. if (!consumeIf('_'))
  883. Error = true;
  884. } else {
  885. while (!Error && !consumeIf('_')) {
  886. char C = consume();
  887. Value *= 16;
  888. if (isDigit(C))
  889. Value += C - '0';
  890. else if ('a' <= C && C <= 'f')
  891. Value += 10 + (C - 'a');
  892. else
  893. Error = true;
  894. }
  895. }
  896. if (Error) {
  897. HexDigits = StringView();
  898. return 0;
  899. }
  900. size_t End = Position - 1;
  901. assert(Start < End);
  902. HexDigits = Input.substr(Start, End - Start);
  903. return Value;
  904. }
  905. void Demangler::print(char C) {
  906. if (Error || !Print)
  907. return;
  908. Output += C;
  909. }
  910. void Demangler::print(StringView S) {
  911. if (Error || !Print)
  912. return;
  913. Output += S;
  914. }
  915. void Demangler::printDecimalNumber(uint64_t N) {
  916. if (Error || !Print)
  917. return;
  918. Output << N;
  919. }
  920. // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
  921. // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
  922. // bound by one of the enclosing binders.
  923. void Demangler::printLifetime(uint64_t Index) {
  924. if (Index == 0) {
  925. print("'_");
  926. return;
  927. }
  928. if (Index - 1 >= BoundLifetimes) {
  929. Error = true;
  930. return;
  931. }
  932. uint64_t Depth = BoundLifetimes - Index;
  933. print('\'');
  934. if (Depth < 26) {
  935. char C = 'a' + Depth;
  936. print(C);
  937. } else {
  938. print('z');
  939. printDecimalNumber(Depth - 26 + 1);
  940. }
  941. }
  942. static inline bool decodePunycodeDigit(char C, size_t &Value) {
  943. if (isLower(C)) {
  944. Value = C - 'a';
  945. return true;
  946. }
  947. if (isDigit(C)) {
  948. Value = 26 + (C - '0');
  949. return true;
  950. }
  951. return false;
  952. }
  953. static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
  954. char *Buffer = Output.getBuffer();
  955. char *Start = Buffer + StartIdx;
  956. char *End = Buffer + Output.getCurrentPosition();
  957. Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
  958. }
  959. // Encodes code point as UTF-8 and stores results in Output. Returns false if
  960. // CodePoint is not a valid unicode scalar value.
  961. static inline bool encodeUTF8(size_t CodePoint, char *Output) {
  962. if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
  963. return false;
  964. if (CodePoint <= 0x7F) {
  965. Output[0] = CodePoint;
  966. return true;
  967. }
  968. if (CodePoint <= 0x7FF) {
  969. Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
  970. Output[1] = 0x80 | (CodePoint & 0x3F);
  971. return true;
  972. }
  973. if (CodePoint <= 0xFFFF) {
  974. Output[0] = 0xE0 | (CodePoint >> 12);
  975. Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
  976. Output[2] = 0x80 | (CodePoint & 0x3F);
  977. return true;
  978. }
  979. if (CodePoint <= 0x10FFFF) {
  980. Output[0] = 0xF0 | (CodePoint >> 18);
  981. Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
  982. Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
  983. Output[3] = 0x80 | (CodePoint & 0x3F);
  984. return true;
  985. }
  986. return false;
  987. }
  988. // Decodes string encoded using punycode and appends results to Output.
  989. // Returns true if decoding was successful.
  990. static bool decodePunycode(StringView Input, OutputBuffer &Output) {
  991. size_t OutputSize = Output.getCurrentPosition();
  992. size_t InputIdx = 0;
  993. // Rust uses an underscore as a delimiter.
  994. size_t DelimiterPos = StringView::npos;
  995. for (size_t I = 0; I != Input.size(); ++I)
  996. if (Input[I] == '_')
  997. DelimiterPos = I;
  998. if (DelimiterPos != StringView::npos) {
  999. // Copy basic code points before the last delimiter to the output.
  1000. for (; InputIdx != DelimiterPos; ++InputIdx) {
  1001. char C = Input[InputIdx];
  1002. if (!isValid(C))
  1003. return false;
  1004. // Code points are padded with zeros while decoding is in progress.
  1005. char UTF8[4] = {C};
  1006. Output += StringView(UTF8, UTF8 + 4);
  1007. }
  1008. // Skip over the delimiter.
  1009. ++InputIdx;
  1010. }
  1011. size_t Base = 36;
  1012. size_t Skew = 38;
  1013. size_t Bias = 72;
  1014. size_t N = 0x80;
  1015. size_t TMin = 1;
  1016. size_t TMax = 26;
  1017. size_t Damp = 700;
  1018. auto Adapt = [&](size_t Delta, size_t NumPoints) {
  1019. Delta /= Damp;
  1020. Delta += Delta / NumPoints;
  1021. Damp = 2;
  1022. size_t K = 0;
  1023. while (Delta > (Base - TMin) * TMax / 2) {
  1024. Delta /= Base - TMin;
  1025. K += Base;
  1026. }
  1027. return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
  1028. };
  1029. // Main decoding loop.
  1030. for (size_t I = 0; InputIdx != Input.size(); I += 1) {
  1031. size_t OldI = I;
  1032. size_t W = 1;
  1033. size_t Max = std::numeric_limits<size_t>::max();
  1034. for (size_t K = Base; true; K += Base) {
  1035. if (InputIdx == Input.size())
  1036. return false;
  1037. char C = Input[InputIdx++];
  1038. size_t Digit = 0;
  1039. if (!decodePunycodeDigit(C, Digit))
  1040. return false;
  1041. if (Digit > (Max - I) / W)
  1042. return false;
  1043. I += Digit * W;
  1044. size_t T;
  1045. if (K <= Bias)
  1046. T = TMin;
  1047. else if (K >= Bias + TMax)
  1048. T = TMax;
  1049. else
  1050. T = K - Bias;
  1051. if (Digit < T)
  1052. break;
  1053. if (W > Max / (Base - T))
  1054. return false;
  1055. W *= (Base - T);
  1056. }
  1057. size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
  1058. Bias = Adapt(I - OldI, NumPoints);
  1059. if (I / NumPoints > Max - N)
  1060. return false;
  1061. N += I / NumPoints;
  1062. I = I % NumPoints;
  1063. // Insert N at position I in the output.
  1064. char UTF8[4] = {};
  1065. if (!encodeUTF8(N, UTF8))
  1066. return false;
  1067. Output.insert(OutputSize + I * 4, UTF8, 4);
  1068. }
  1069. removeNullBytes(Output, OutputSize);
  1070. return true;
  1071. }
  1072. void Demangler::printIdentifier(Identifier Ident) {
  1073. if (Error || !Print)
  1074. return;
  1075. if (Ident.Punycode) {
  1076. if (!decodePunycode(Ident.Name, Output))
  1077. Error = true;
  1078. } else {
  1079. print(Ident.Name);
  1080. }
  1081. }
  1082. char Demangler::look() const {
  1083. if (Error || Position >= Input.size())
  1084. return 0;
  1085. return Input[Position];
  1086. }
  1087. char Demangler::consume() {
  1088. if (Error || Position >= Input.size()) {
  1089. Error = true;
  1090. return 0;
  1091. }
  1092. return Input[Position++];
  1093. }
  1094. bool Demangler::consumeIf(char Prefix) {
  1095. if (Error || Position >= Input.size() || Input[Position] != Prefix)
  1096. return false;
  1097. Position += 1;
  1098. return true;
  1099. }
  1100. /// Computes A + B. When computation wraps around sets the error and returns
  1101. /// false. Otherwise assigns the result to A and returns true.
  1102. bool Demangler::addAssign(uint64_t &A, uint64_t B) {
  1103. if (A > std::numeric_limits<uint64_t>::max() - B) {
  1104. Error = true;
  1105. return false;
  1106. }
  1107. A += B;
  1108. return true;
  1109. }
  1110. /// Computes A * B. When computation wraps around sets the error and returns
  1111. /// false. Otherwise assigns the result to A and returns true.
  1112. bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
  1113. if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
  1114. Error = true;
  1115. return false;
  1116. }
  1117. A *= B;
  1118. return true;
  1119. }