RustDemangle.cpp 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295
  1. //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines a demangler for Rust v0 mangled symbols as specified in
  10. // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/Demangle/Demangle.h"
  14. #include "llvm/Demangle/StringView.h"
  15. #include "llvm/Demangle/Utility.h"
  16. #include <algorithm>
  17. #include <cassert>
  18. #include <cstdint>
  19. #include <cstring>
  20. #include <limits>
  21. using namespace llvm;
  22. using llvm::itanium_demangle::OutputBuffer;
  23. using llvm::itanium_demangle::StringView;
  24. using llvm::itanium_demangle::SwapAndRestore;
  25. namespace {
  26. struct Identifier {
  27. StringView Name;
  28. bool Punycode;
  29. bool empty() const { return Name.empty(); }
  30. };
  31. enum class BasicType {
  32. Bool,
  33. Char,
  34. I8,
  35. I16,
  36. I32,
  37. I64,
  38. I128,
  39. ISize,
  40. U8,
  41. U16,
  42. U32,
  43. U64,
  44. U128,
  45. USize,
  46. F32,
  47. F64,
  48. Str,
  49. Placeholder,
  50. Unit,
  51. Variadic,
  52. Never,
  53. };
  54. enum class IsInType {
  55. No,
  56. Yes,
  57. };
  58. enum class LeaveGenericsOpen {
  59. No,
  60. Yes,
  61. };
  62. class Demangler {
  63. // Maximum recursion level. Used to avoid stack overflow.
  64. size_t MaxRecursionLevel;
  65. // Current recursion level.
  66. size_t RecursionLevel;
  67. size_t BoundLifetimes;
  68. // Input string that is being demangled with "_R" prefix removed.
  69. StringView Input;
  70. // Position in the input string.
  71. size_t Position;
  72. // When true, print methods append the output to the stream.
  73. // When false, the output is suppressed.
  74. bool Print;
  75. // True if an error occurred.
  76. bool Error;
  77. public:
  78. // Demangled output.
  79. OutputBuffer Output;
  80. Demangler(size_t MaxRecursionLevel = 500);
  81. bool demangle(StringView MangledName);
  82. private:
  83. bool demanglePath(IsInType Type,
  84. LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
  85. void demangleImplPath(IsInType InType);
  86. void demangleGenericArg();
  87. void demangleType();
  88. void demangleFnSig();
  89. void demangleDynBounds();
  90. void demangleDynTrait();
  91. void demangleOptionalBinder();
  92. void demangleConst();
  93. void demangleConstInt();
  94. void demangleConstBool();
  95. void demangleConstChar();
  96. template <typename Callable> void demangleBackref(Callable Demangler) {
  97. uint64_t Backref = parseBase62Number();
  98. if (Error || Backref >= Position) {
  99. Error = true;
  100. return;
  101. }
  102. if (!Print)
  103. return;
  104. SwapAndRestore<size_t> SavePosition(Position, Position);
  105. Position = Backref;
  106. Demangler();
  107. }
  108. Identifier parseIdentifier();
  109. uint64_t parseOptionalBase62Number(char Tag);
  110. uint64_t parseBase62Number();
  111. uint64_t parseDecimalNumber();
  112. uint64_t parseHexNumber(StringView &HexDigits);
  113. void print(char C);
  114. void print(StringView S);
  115. void printDecimalNumber(uint64_t N);
  116. void printBasicType(BasicType);
  117. void printLifetime(uint64_t Index);
  118. void printIdentifier(Identifier Ident);
  119. char look() const;
  120. char consume();
  121. bool consumeIf(char Prefix);
  122. bool addAssign(uint64_t &A, uint64_t B);
  123. bool mulAssign(uint64_t &A, uint64_t B);
  124. };
  125. } // namespace
  126. char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N,
  127. int *Status) {
  128. if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) {
  129. if (Status != nullptr)
  130. *Status = demangle_invalid_args;
  131. return nullptr;
  132. }
  133. // Return early if mangled name doesn't look like a Rust symbol.
  134. StringView Mangled(MangledName);
  135. if (!Mangled.startsWith("_R")) {
  136. if (Status != nullptr)
  137. *Status = demangle_invalid_mangled_name;
  138. return nullptr;
  139. }
  140. Demangler D;
  141. if (!initializeOutputBuffer(nullptr, nullptr, D.Output, 1024)) {
  142. if (Status != nullptr)
  143. *Status = demangle_memory_alloc_failure;
  144. return nullptr;
  145. }
  146. if (!D.demangle(Mangled)) {
  147. if (Status != nullptr)
  148. *Status = demangle_invalid_mangled_name;
  149. std::free(D.Output.getBuffer());
  150. return nullptr;
  151. }
  152. D.Output += '\0';
  153. char *Demangled = D.Output.getBuffer();
  154. size_t DemangledLen = D.Output.getCurrentPosition();
  155. if (Buf != nullptr) {
  156. if (DemangledLen <= *N) {
  157. std::memcpy(Buf, Demangled, DemangledLen);
  158. std::free(Demangled);
  159. Demangled = Buf;
  160. } else {
  161. std::free(Buf);
  162. }
  163. }
  164. if (N != nullptr)
  165. *N = DemangledLen;
  166. if (Status != nullptr)
  167. *Status = demangle_success;
  168. return Demangled;
  169. }
  170. Demangler::Demangler(size_t MaxRecursionLevel)
  171. : MaxRecursionLevel(MaxRecursionLevel) {}
  172. static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
  173. static inline bool isHexDigit(const char C) {
  174. return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
  175. }
  176. static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
  177. static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
  178. /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
  179. static inline bool isValid(const char C) {
  180. return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
  181. }
  182. // Demangles Rust v0 mangled symbol. Returns true when successful, and false
  183. // otherwise. The demangled symbol is stored in Output field. It is
  184. // responsibility of the caller to free the memory behind the output stream.
  185. //
  186. // <symbol-name> = "_R" <path> [<instantiating-crate>]
  187. bool Demangler::demangle(StringView Mangled) {
  188. Position = 0;
  189. Error = false;
  190. Print = true;
  191. RecursionLevel = 0;
  192. BoundLifetimes = 0;
  193. if (!Mangled.consumeFront("_R")) {
  194. Error = true;
  195. return false;
  196. }
  197. size_t Dot = Mangled.find('.');
  198. Input = Mangled.substr(0, Dot);
  199. StringView Suffix = Mangled.dropFront(Dot);
  200. demanglePath(IsInType::No);
  201. if (Position != Input.size()) {
  202. SwapAndRestore<bool> SavePrint(Print, false);
  203. demanglePath(IsInType::No);
  204. }
  205. if (Position != Input.size())
  206. Error = true;
  207. if (!Suffix.empty()) {
  208. print(" (");
  209. print(Suffix);
  210. print(")");
  211. }
  212. return !Error;
  213. }
  214. // Demangles a path. InType indicates whether a path is inside a type. When
  215. // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
  216. // output. Return value indicates whether generics arguments have been left
  217. // open.
  218. //
  219. // <path> = "C" <identifier> // crate root
  220. // | "M" <impl-path> <type> // <T> (inherent impl)
  221. // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)
  222. // | "Y" <type> <path> // <T as Trait> (trait definition)
  223. // | "N" <ns> <path> <identifier> // ...::ident (nested path)
  224. // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
  225. // | <backref>
  226. // <identifier> = [<disambiguator>] <undisambiguated-identifier>
  227. // <ns> = "C" // closure
  228. // | "S" // shim
  229. // | <A-Z> // other special namespaces
  230. // | <a-z> // internal namespaces
  231. bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
  232. if (Error || RecursionLevel >= MaxRecursionLevel) {
  233. Error = true;
  234. return false;
  235. }
  236. SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
  237. switch (consume()) {
  238. case 'C': {
  239. parseOptionalBase62Number('s');
  240. printIdentifier(parseIdentifier());
  241. break;
  242. }
  243. case 'M': {
  244. demangleImplPath(InType);
  245. print("<");
  246. demangleType();
  247. print(">");
  248. break;
  249. }
  250. case 'X': {
  251. demangleImplPath(InType);
  252. print("<");
  253. demangleType();
  254. print(" as ");
  255. demanglePath(IsInType::Yes);
  256. print(">");
  257. break;
  258. }
  259. case 'Y': {
  260. print("<");
  261. demangleType();
  262. print(" as ");
  263. demanglePath(IsInType::Yes);
  264. print(">");
  265. break;
  266. }
  267. case 'N': {
  268. char NS = consume();
  269. if (!isLower(NS) && !isUpper(NS)) {
  270. Error = true;
  271. break;
  272. }
  273. demanglePath(InType);
  274. uint64_t Disambiguator = parseOptionalBase62Number('s');
  275. Identifier Ident = parseIdentifier();
  276. if (isUpper(NS)) {
  277. // Special namespaces
  278. print("::{");
  279. if (NS == 'C')
  280. print("closure");
  281. else if (NS == 'S')
  282. print("shim");
  283. else
  284. print(NS);
  285. if (!Ident.empty()) {
  286. print(":");
  287. printIdentifier(Ident);
  288. }
  289. print('#');
  290. printDecimalNumber(Disambiguator);
  291. print('}');
  292. } else {
  293. // Implementation internal namespaces.
  294. if (!Ident.empty()) {
  295. print("::");
  296. printIdentifier(Ident);
  297. }
  298. }
  299. break;
  300. }
  301. case 'I': {
  302. demanglePath(InType);
  303. // Omit "::" when in a type, where it is optional.
  304. if (InType == IsInType::No)
  305. print("::");
  306. print("<");
  307. for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
  308. if (I > 0)
  309. print(", ");
  310. demangleGenericArg();
  311. }
  312. if (LeaveOpen == LeaveGenericsOpen::Yes)
  313. return true;
  314. else
  315. print(">");
  316. break;
  317. }
  318. case 'B': {
  319. bool IsOpen = false;
  320. demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
  321. return IsOpen;
  322. }
  323. default:
  324. Error = true;
  325. break;
  326. }
  327. return false;
  328. }
  329. // <impl-path> = [<disambiguator>] <path>
  330. // <disambiguator> = "s" <base-62-number>
  331. void Demangler::demangleImplPath(IsInType InType) {
  332. SwapAndRestore<bool> SavePrint(Print, false);
  333. parseOptionalBase62Number('s');
  334. demanglePath(InType);
  335. }
  336. // <generic-arg> = <lifetime>
  337. // | <type>
  338. // | "K" <const>
  339. // <lifetime> = "L" <base-62-number>
  340. void Demangler::demangleGenericArg() {
  341. if (consumeIf('L'))
  342. printLifetime(parseBase62Number());
  343. else if (consumeIf('K'))
  344. demangleConst();
  345. else
  346. demangleType();
  347. }
  348. // <basic-type> = "a" // i8
  349. // | "b" // bool
  350. // | "c" // char
  351. // | "d" // f64
  352. // | "e" // str
  353. // | "f" // f32
  354. // | "h" // u8
  355. // | "i" // isize
  356. // | "j" // usize
  357. // | "l" // i32
  358. // | "m" // u32
  359. // | "n" // i128
  360. // | "o" // u128
  361. // | "s" // i16
  362. // | "t" // u16
  363. // | "u" // ()
  364. // | "v" // ...
  365. // | "x" // i64
  366. // | "y" // u64
  367. // | "z" // !
  368. // | "p" // placeholder (e.g. for generic params), shown as _
  369. static bool parseBasicType(char C, BasicType &Type) {
  370. switch (C) {
  371. case 'a':
  372. Type = BasicType::I8;
  373. return true;
  374. case 'b':
  375. Type = BasicType::Bool;
  376. return true;
  377. case 'c':
  378. Type = BasicType::Char;
  379. return true;
  380. case 'd':
  381. Type = BasicType::F64;
  382. return true;
  383. case 'e':
  384. Type = BasicType::Str;
  385. return true;
  386. case 'f':
  387. Type = BasicType::F32;
  388. return true;
  389. case 'h':
  390. Type = BasicType::U8;
  391. return true;
  392. case 'i':
  393. Type = BasicType::ISize;
  394. return true;
  395. case 'j':
  396. Type = BasicType::USize;
  397. return true;
  398. case 'l':
  399. Type = BasicType::I32;
  400. return true;
  401. case 'm':
  402. Type = BasicType::U32;
  403. return true;
  404. case 'n':
  405. Type = BasicType::I128;
  406. return true;
  407. case 'o':
  408. Type = BasicType::U128;
  409. return true;
  410. case 'p':
  411. Type = BasicType::Placeholder;
  412. return true;
  413. case 's':
  414. Type = BasicType::I16;
  415. return true;
  416. case 't':
  417. Type = BasicType::U16;
  418. return true;
  419. case 'u':
  420. Type = BasicType::Unit;
  421. return true;
  422. case 'v':
  423. Type = BasicType::Variadic;
  424. return true;
  425. case 'x':
  426. Type = BasicType::I64;
  427. return true;
  428. case 'y':
  429. Type = BasicType::U64;
  430. return true;
  431. case 'z':
  432. Type = BasicType::Never;
  433. return true;
  434. default:
  435. return false;
  436. }
  437. }
  438. void Demangler::printBasicType(BasicType Type) {
  439. switch (Type) {
  440. case BasicType::Bool:
  441. print("bool");
  442. break;
  443. case BasicType::Char:
  444. print("char");
  445. break;
  446. case BasicType::I8:
  447. print("i8");
  448. break;
  449. case BasicType::I16:
  450. print("i16");
  451. break;
  452. case BasicType::I32:
  453. print("i32");
  454. break;
  455. case BasicType::I64:
  456. print("i64");
  457. break;
  458. case BasicType::I128:
  459. print("i128");
  460. break;
  461. case BasicType::ISize:
  462. print("isize");
  463. break;
  464. case BasicType::U8:
  465. print("u8");
  466. break;
  467. case BasicType::U16:
  468. print("u16");
  469. break;
  470. case BasicType::U32:
  471. print("u32");
  472. break;
  473. case BasicType::U64:
  474. print("u64");
  475. break;
  476. case BasicType::U128:
  477. print("u128");
  478. break;
  479. case BasicType::USize:
  480. print("usize");
  481. break;
  482. case BasicType::F32:
  483. print("f32");
  484. break;
  485. case BasicType::F64:
  486. print("f64");
  487. break;
  488. case BasicType::Str:
  489. print("str");
  490. break;
  491. case BasicType::Placeholder:
  492. print("_");
  493. break;
  494. case BasicType::Unit:
  495. print("()");
  496. break;
  497. case BasicType::Variadic:
  498. print("...");
  499. break;
  500. case BasicType::Never:
  501. print("!");
  502. break;
  503. }
  504. }
  505. // <type> = | <basic-type>
  506. // | <path> // named type
  507. // | "A" <type> <const> // [T; N]
  508. // | "S" <type> // [T]
  509. // | "T" {<type>} "E" // (T1, T2, T3, ...)
  510. // | "R" [<lifetime>] <type> // &T
  511. // | "Q" [<lifetime>] <type> // &mut T
  512. // | "P" <type> // *const T
  513. // | "O" <type> // *mut T
  514. // | "F" <fn-sig> // fn(...) -> ...
  515. // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
  516. // | <backref> // backref
  517. void Demangler::demangleType() {
  518. if (Error || RecursionLevel >= MaxRecursionLevel) {
  519. Error = true;
  520. return;
  521. }
  522. SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
  523. size_t Start = Position;
  524. char C = consume();
  525. BasicType Type;
  526. if (parseBasicType(C, Type))
  527. return printBasicType(Type);
  528. switch (C) {
  529. case 'A':
  530. print("[");
  531. demangleType();
  532. print("; ");
  533. demangleConst();
  534. print("]");
  535. break;
  536. case 'S':
  537. print("[");
  538. demangleType();
  539. print("]");
  540. break;
  541. case 'T': {
  542. print("(");
  543. size_t I = 0;
  544. for (; !Error && !consumeIf('E'); ++I) {
  545. if (I > 0)
  546. print(", ");
  547. demangleType();
  548. }
  549. if (I == 1)
  550. print(",");
  551. print(")");
  552. break;
  553. }
  554. case 'R':
  555. case 'Q':
  556. print('&');
  557. if (consumeIf('L')) {
  558. if (auto Lifetime = parseBase62Number()) {
  559. printLifetime(Lifetime);
  560. print(' ');
  561. }
  562. }
  563. if (C == 'Q')
  564. print("mut ");
  565. demangleType();
  566. break;
  567. case 'P':
  568. print("*const ");
  569. demangleType();
  570. break;
  571. case 'O':
  572. print("*mut ");
  573. demangleType();
  574. break;
  575. case 'F':
  576. demangleFnSig();
  577. break;
  578. case 'D':
  579. demangleDynBounds();
  580. if (consumeIf('L')) {
  581. if (auto Lifetime = parseBase62Number()) {
  582. print(" + ");
  583. printLifetime(Lifetime);
  584. }
  585. } else {
  586. Error = true;
  587. }
  588. break;
  589. case 'B':
  590. demangleBackref([&] { demangleType(); });
  591. break;
  592. default:
  593. Position = Start;
  594. demanglePath(IsInType::Yes);
  595. break;
  596. }
  597. }
  598. // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
  599. // <abi> = "C"
  600. // | <undisambiguated-identifier>
  601. void Demangler::demangleFnSig() {
  602. SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
  603. demangleOptionalBinder();
  604. if (consumeIf('U'))
  605. print("unsafe ");
  606. if (consumeIf('K')) {
  607. print("extern \"");
  608. if (consumeIf('C')) {
  609. print("C");
  610. } else {
  611. Identifier Ident = parseIdentifier();
  612. if (Ident.Punycode)
  613. Error = true;
  614. for (char C : Ident.Name) {
  615. // When mangling ABI string, the "-" is replaced with "_".
  616. if (C == '_')
  617. C = '-';
  618. print(C);
  619. }
  620. }
  621. print("\" ");
  622. }
  623. print("fn(");
  624. for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
  625. if (I > 0)
  626. print(", ");
  627. demangleType();
  628. }
  629. print(")");
  630. if (consumeIf('u')) {
  631. // Skip the unit type from the output.
  632. } else {
  633. print(" -> ");
  634. demangleType();
  635. }
  636. }
  637. // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
  638. void Demangler::demangleDynBounds() {
  639. SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
  640. print("dyn ");
  641. demangleOptionalBinder();
  642. for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
  643. if (I > 0)
  644. print(" + ");
  645. demangleDynTrait();
  646. }
  647. }
  648. // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
  649. // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
  650. void Demangler::demangleDynTrait() {
  651. bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
  652. while (!Error && consumeIf('p')) {
  653. if (!IsOpen) {
  654. IsOpen = true;
  655. print('<');
  656. } else {
  657. print(", ");
  658. }
  659. print(parseIdentifier().Name);
  660. print(" = ");
  661. demangleType();
  662. }
  663. if (IsOpen)
  664. print(">");
  665. }
  666. // Demangles optional binder and updates the number of bound lifetimes.
  667. //
  668. // <binder> = "G" <base-62-number>
  669. void Demangler::demangleOptionalBinder() {
  670. uint64_t Binder = parseOptionalBase62Number('G');
  671. if (Error || Binder == 0)
  672. return;
  673. // In valid inputs each bound lifetime is referenced later. Referencing a
  674. // lifetime requires at least one byte of input. Reject inputs that are too
  675. // short to reference all bound lifetimes. Otherwise demangling of invalid
  676. // binders could generate excessive amounts of output.
  677. if (Binder >= Input.size() - BoundLifetimes) {
  678. Error = true;
  679. return;
  680. }
  681. print("for<");
  682. for (size_t I = 0; I != Binder; ++I) {
  683. BoundLifetimes += 1;
  684. if (I > 0)
  685. print(", ");
  686. printLifetime(1);
  687. }
  688. print("> ");
  689. }
  690. // <const> = <basic-type> <const-data>
  691. // | "p" // placeholder
  692. // | <backref>
  693. void Demangler::demangleConst() {
  694. if (Error || RecursionLevel >= MaxRecursionLevel) {
  695. Error = true;
  696. return;
  697. }
  698. SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
  699. char C = consume();
  700. BasicType Type;
  701. if (parseBasicType(C, Type)) {
  702. switch (Type) {
  703. case BasicType::I8:
  704. case BasicType::I16:
  705. case BasicType::I32:
  706. case BasicType::I64:
  707. case BasicType::I128:
  708. case BasicType::ISize:
  709. case BasicType::U8:
  710. case BasicType::U16:
  711. case BasicType::U32:
  712. case BasicType::U64:
  713. case BasicType::U128:
  714. case BasicType::USize:
  715. demangleConstInt();
  716. break;
  717. case BasicType::Bool:
  718. demangleConstBool();
  719. break;
  720. case BasicType::Char:
  721. demangleConstChar();
  722. break;
  723. case BasicType::Placeholder:
  724. print('_');
  725. break;
  726. default:
  727. Error = true;
  728. break;
  729. }
  730. } else if (C == 'B') {
  731. demangleBackref([&] { demangleConst(); });
  732. } else {
  733. Error = true;
  734. }
  735. }
  736. // <const-data> = ["n"] <hex-number>
  737. void Demangler::demangleConstInt() {
  738. if (consumeIf('n'))
  739. print('-');
  740. StringView HexDigits;
  741. uint64_t Value = parseHexNumber(HexDigits);
  742. if (HexDigits.size() <= 16) {
  743. printDecimalNumber(Value);
  744. } else {
  745. print("0x");
  746. print(HexDigits);
  747. }
  748. }
  749. // <const-data> = "0_" // false
  750. // | "1_" // true
  751. void Demangler::demangleConstBool() {
  752. StringView HexDigits;
  753. parseHexNumber(HexDigits);
  754. if (HexDigits == "0")
  755. print("false");
  756. else if (HexDigits == "1")
  757. print("true");
  758. else
  759. Error = true;
  760. }
  761. /// Returns true if CodePoint represents a printable ASCII character.
  762. static bool isAsciiPrintable(uint64_t CodePoint) {
  763. return 0x20 <= CodePoint && CodePoint <= 0x7e;
  764. }
  765. // <const-data> = <hex-number>
  766. void Demangler::demangleConstChar() {
  767. StringView HexDigits;
  768. uint64_t CodePoint = parseHexNumber(HexDigits);
  769. if (Error || HexDigits.size() > 6) {
  770. Error = true;
  771. return;
  772. }
  773. print("'");
  774. switch (CodePoint) {
  775. case '\t':
  776. print(R"(\t)");
  777. break;
  778. case '\r':
  779. print(R"(\r)");
  780. break;
  781. case '\n':
  782. print(R"(\n)");
  783. break;
  784. case '\\':
  785. print(R"(\\)");
  786. break;
  787. case '"':
  788. print(R"(")");
  789. break;
  790. case '\'':
  791. print(R"(\')");
  792. break;
  793. default:
  794. if (isAsciiPrintable(CodePoint)) {
  795. char C = CodePoint;
  796. print(C);
  797. } else {
  798. print(R"(\u{)");
  799. print(HexDigits);
  800. print('}');
  801. }
  802. break;
  803. }
  804. print('\'');
  805. }
  806. // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
  807. Identifier Demangler::parseIdentifier() {
  808. bool Punycode = consumeIf('u');
  809. uint64_t Bytes = parseDecimalNumber();
  810. // Underscore resolves the ambiguity when identifier starts with a decimal
  811. // digit or another underscore.
  812. consumeIf('_');
  813. if (Error || Bytes > Input.size() - Position) {
  814. Error = true;
  815. return {};
  816. }
  817. StringView S = Input.substr(Position, Bytes);
  818. Position += Bytes;
  819. if (!std::all_of(S.begin(), S.end(), isValid)) {
  820. Error = true;
  821. return {};
  822. }
  823. return {S, Punycode};
  824. }
  825. // Parses optional base 62 number. The presence of a number is determined using
  826. // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
  827. //
  828. // This function is indended for parsing disambiguators and binders which when
  829. // not present have their value interpreted as 0, and otherwise as decoded
  830. // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
  831. // 2. When "G" is absent value is 0.
  832. uint64_t Demangler::parseOptionalBase62Number(char Tag) {
  833. if (!consumeIf(Tag))
  834. return 0;
  835. uint64_t N = parseBase62Number();
  836. if (Error || !addAssign(N, 1))
  837. return 0;
  838. return N;
  839. }
  840. // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
  841. // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
  842. // "1_" encodes 2, etc.
  843. //
  844. // <base-62-number> = {<0-9a-zA-Z>} "_"
  845. uint64_t Demangler::parseBase62Number() {
  846. if (consumeIf('_'))
  847. return 0;
  848. uint64_t Value = 0;
  849. while (true) {
  850. uint64_t Digit;
  851. char C = consume();
  852. if (C == '_') {
  853. break;
  854. } else if (isDigit(C)) {
  855. Digit = C - '0';
  856. } else if (isLower(C)) {
  857. Digit = 10 + (C - 'a');
  858. } else if (isUpper(C)) {
  859. Digit = 10 + 26 + (C - 'A');
  860. } else {
  861. Error = true;
  862. return 0;
  863. }
  864. if (!mulAssign(Value, 62))
  865. return 0;
  866. if (!addAssign(Value, Digit))
  867. return 0;
  868. }
  869. if (!addAssign(Value, 1))
  870. return 0;
  871. return Value;
  872. }
  873. // Parses a decimal number that had been encoded without any leading zeros.
  874. //
  875. // <decimal-number> = "0"
  876. // | <1-9> {<0-9>}
  877. uint64_t Demangler::parseDecimalNumber() {
  878. char C = look();
  879. if (!isDigit(C)) {
  880. Error = true;
  881. return 0;
  882. }
  883. if (C == '0') {
  884. consume();
  885. return 0;
  886. }
  887. uint64_t Value = 0;
  888. while (isDigit(look())) {
  889. if (!mulAssign(Value, 10)) {
  890. Error = true;
  891. return 0;
  892. }
  893. uint64_t D = consume() - '0';
  894. if (!addAssign(Value, D))
  895. return 0;
  896. }
  897. return Value;
  898. }
  899. // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
  900. // value and stores hex digits in HexDigits. The return value is unspecified if
  901. // HexDigits.size() > 16.
  902. //
  903. // <hex-number> = "0_"
  904. // | <1-9a-f> {<0-9a-f>} "_"
  905. uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
  906. size_t Start = Position;
  907. uint64_t Value = 0;
  908. if (!isHexDigit(look()))
  909. Error = true;
  910. if (consumeIf('0')) {
  911. if (!consumeIf('_'))
  912. Error = true;
  913. } else {
  914. while (!Error && !consumeIf('_')) {
  915. char C = consume();
  916. Value *= 16;
  917. if (isDigit(C))
  918. Value += C - '0';
  919. else if ('a' <= C && C <= 'f')
  920. Value += 10 + (C - 'a');
  921. else
  922. Error = true;
  923. }
  924. }
  925. if (Error) {
  926. HexDigits = StringView();
  927. return 0;
  928. }
  929. size_t End = Position - 1;
  930. assert(Start < End);
  931. HexDigits = Input.substr(Start, End - Start);
  932. return Value;
  933. }
  934. void Demangler::print(char C) {
  935. if (Error || !Print)
  936. return;
  937. Output += C;
  938. }
  939. void Demangler::print(StringView S) {
  940. if (Error || !Print)
  941. return;
  942. Output += S;
  943. }
  944. void Demangler::printDecimalNumber(uint64_t N) {
  945. if (Error || !Print)
  946. return;
  947. Output << N;
  948. }
  949. // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
  950. // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
  951. // bound by one of the enclosing binders.
  952. void Demangler::printLifetime(uint64_t Index) {
  953. if (Index == 0) {
  954. print("'_");
  955. return;
  956. }
  957. if (Index - 1 >= BoundLifetimes) {
  958. Error = true;
  959. return;
  960. }
  961. uint64_t Depth = BoundLifetimes - Index;
  962. print('\'');
  963. if (Depth < 26) {
  964. char C = 'a' + Depth;
  965. print(C);
  966. } else {
  967. print('z');
  968. printDecimalNumber(Depth - 26 + 1);
  969. }
  970. }
  971. static inline bool decodePunycodeDigit(char C, size_t &Value) {
  972. if (isLower(C)) {
  973. Value = C - 'a';
  974. return true;
  975. }
  976. if (isDigit(C)) {
  977. Value = 26 + (C - '0');
  978. return true;
  979. }
  980. return false;
  981. }
  982. static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
  983. char *Buffer = Output.getBuffer();
  984. char *Start = Buffer + StartIdx;
  985. char *End = Buffer + Output.getCurrentPosition();
  986. Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
  987. }
  988. // Encodes code point as UTF-8 and stores results in Output. Returns false if
  989. // CodePoint is not a valid unicode scalar value.
  990. static inline bool encodeUTF8(size_t CodePoint, char *Output) {
  991. if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
  992. return false;
  993. if (CodePoint <= 0x7F) {
  994. Output[0] = CodePoint;
  995. return true;
  996. }
  997. if (CodePoint <= 0x7FF) {
  998. Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
  999. Output[1] = 0x80 | (CodePoint & 0x3F);
  1000. return true;
  1001. }
  1002. if (CodePoint <= 0xFFFF) {
  1003. Output[0] = 0xE0 | (CodePoint >> 12);
  1004. Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
  1005. Output[2] = 0x80 | (CodePoint & 0x3F);
  1006. return true;
  1007. }
  1008. if (CodePoint <= 0x10FFFF) {
  1009. Output[0] = 0xF0 | (CodePoint >> 18);
  1010. Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
  1011. Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
  1012. Output[3] = 0x80 | (CodePoint & 0x3F);
  1013. return true;
  1014. }
  1015. return false;
  1016. }
  1017. // Decodes string encoded using punycode and appends results to Output.
  1018. // Returns true if decoding was successful.
  1019. static bool decodePunycode(StringView Input, OutputBuffer &Output) {
  1020. size_t OutputSize = Output.getCurrentPosition();
  1021. size_t InputIdx = 0;
  1022. // Rust uses an underscore as a delimiter.
  1023. size_t DelimiterPos = StringView::npos;
  1024. for (size_t I = 0; I != Input.size(); ++I)
  1025. if (Input[I] == '_')
  1026. DelimiterPos = I;
  1027. if (DelimiterPos != StringView::npos) {
  1028. // Copy basic code points before the last delimiter to the output.
  1029. for (; InputIdx != DelimiterPos; ++InputIdx) {
  1030. char C = Input[InputIdx];
  1031. if (!isValid(C))
  1032. return false;
  1033. // Code points are padded with zeros while decoding is in progress.
  1034. char UTF8[4] = {C};
  1035. Output += StringView(UTF8, UTF8 + 4);
  1036. }
  1037. // Skip over the delimiter.
  1038. ++InputIdx;
  1039. }
  1040. size_t Base = 36;
  1041. size_t Skew = 38;
  1042. size_t Bias = 72;
  1043. size_t N = 0x80;
  1044. size_t TMin = 1;
  1045. size_t TMax = 26;
  1046. size_t Damp = 700;
  1047. auto Adapt = [&](size_t Delta, size_t NumPoints) {
  1048. Delta /= Damp;
  1049. Delta += Delta / NumPoints;
  1050. Damp = 2;
  1051. size_t K = 0;
  1052. while (Delta > (Base - TMin) * TMax / 2) {
  1053. Delta /= Base - TMin;
  1054. K += Base;
  1055. }
  1056. return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
  1057. };
  1058. // Main decoding loop.
  1059. for (size_t I = 0; InputIdx != Input.size(); I += 1) {
  1060. size_t OldI = I;
  1061. size_t W = 1;
  1062. size_t Max = std::numeric_limits<size_t>::max();
  1063. for (size_t K = Base; true; K += Base) {
  1064. if (InputIdx == Input.size())
  1065. return false;
  1066. char C = Input[InputIdx++];
  1067. size_t Digit = 0;
  1068. if (!decodePunycodeDigit(C, Digit))
  1069. return false;
  1070. if (Digit > (Max - I) / W)
  1071. return false;
  1072. I += Digit * W;
  1073. size_t T;
  1074. if (K <= Bias)
  1075. T = TMin;
  1076. else if (K >= Bias + TMax)
  1077. T = TMax;
  1078. else
  1079. T = K - Bias;
  1080. if (Digit < T)
  1081. break;
  1082. if (W > Max / (Base - T))
  1083. return false;
  1084. W *= (Base - T);
  1085. }
  1086. size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
  1087. Bias = Adapt(I - OldI, NumPoints);
  1088. if (I / NumPoints > Max - N)
  1089. return false;
  1090. N += I / NumPoints;
  1091. I = I % NumPoints;
  1092. // Insert N at position I in the output.
  1093. char UTF8[4] = {};
  1094. if (!encodeUTF8(N, UTF8))
  1095. return false;
  1096. Output.insert(OutputSize + I * 4, UTF8, 4);
  1097. }
  1098. removeNullBytes(Output, OutputSize);
  1099. return true;
  1100. }
  1101. void Demangler::printIdentifier(Identifier Ident) {
  1102. if (Error || !Print)
  1103. return;
  1104. if (Ident.Punycode) {
  1105. if (!decodePunycode(Ident.Name, Output))
  1106. Error = true;
  1107. } else {
  1108. print(Ident.Name);
  1109. }
  1110. }
  1111. char Demangler::look() const {
  1112. if (Error || Position >= Input.size())
  1113. return 0;
  1114. return Input[Position];
  1115. }
  1116. char Demangler::consume() {
  1117. if (Error || Position >= Input.size()) {
  1118. Error = true;
  1119. return 0;
  1120. }
  1121. return Input[Position++];
  1122. }
  1123. bool Demangler::consumeIf(char Prefix) {
  1124. if (Error || Position >= Input.size() || Input[Position] != Prefix)
  1125. return false;
  1126. Position += 1;
  1127. return true;
  1128. }
  1129. /// Computes A + B. When computation wraps around sets the error and returns
  1130. /// false. Otherwise assigns the result to A and returns true.
  1131. bool Demangler::addAssign(uint64_t &A, uint64_t B) {
  1132. if (A > std::numeric_limits<uint64_t>::max() - B) {
  1133. Error = true;
  1134. return false;
  1135. }
  1136. A += B;
  1137. return true;
  1138. }
  1139. /// Computes A * B. When computation wraps around sets the error and returns
  1140. /// false. Otherwise assigns the result to A and returns true.
  1141. bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
  1142. if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
  1143. Error = true;
  1144. return false;
  1145. }
  1146. A *= B;
  1147. return true;
  1148. }