demangle_rust.cc 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925
  1. // Copyright 2024 The Abseil Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "absl/debugging/internal/demangle_rust.h"
  15. #include <cstddef>
  16. #include <cstdint>
  17. #include <cstring>
  18. #include <limits>
  19. #include "absl/base/attributes.h"
  20. #include "absl/base/config.h"
  21. #include "absl/debugging/internal/decode_rust_punycode.h"
  22. namespace absl {
  23. ABSL_NAMESPACE_BEGIN
  24. namespace debugging_internal {
  25. namespace {
  26. // Same step limit as the C++ demangler in demangle.cc uses.
  27. constexpr int kMaxReturns = 1 << 17;
  28. bool IsDigit(char c) { return '0' <= c && c <= '9'; }
  29. bool IsLower(char c) { return 'a' <= c && c <= 'z'; }
  30. bool IsUpper(char c) { return 'A' <= c && c <= 'Z'; }
  31. bool IsAlpha(char c) { return IsLower(c) || IsUpper(c); }
  32. bool IsIdentifierChar(char c) { return IsAlpha(c) || IsDigit(c) || c == '_'; }
  33. bool IsLowerHexDigit(char c) { return IsDigit(c) || ('a' <= c && c <= 'f'); }
  34. const char* BasicTypeName(char c) {
  35. switch (c) {
  36. case 'a': return "i8";
  37. case 'b': return "bool";
  38. case 'c': return "char";
  39. case 'd': return "f64";
  40. case 'e': return "str";
  41. case 'f': return "f32";
  42. case 'h': return "u8";
  43. case 'i': return "isize";
  44. case 'j': return "usize";
  45. case 'l': return "i32";
  46. case 'm': return "u32";
  47. case 'n': return "i128";
  48. case 'o': return "u128";
  49. case 'p': return "_";
  50. case 's': return "i16";
  51. case 't': return "u16";
  52. case 'u': return "()";
  53. case 'v': return "...";
  54. case 'x': return "i64";
  55. case 'y': return "u64";
  56. case 'z': return "!";
  57. }
  58. return nullptr;
  59. }
  60. // Parser for Rust symbol mangling v0, whose grammar is defined here:
  61. //
  62. // https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#symbol-grammar-summary
  63. class RustSymbolParser {
  64. public:
  65. // Prepares to demangle the given encoding, a Rust symbol name starting with
  66. // _R, into the output buffer [out, out_end). The caller is expected to
  67. // continue by calling the new object's Parse function.
  68. RustSymbolParser(const char* encoding, char* out, char* const out_end)
  69. : encoding_(encoding), out_(out), out_end_(out_end) {
  70. if (out_ != out_end_) *out_ = '\0';
  71. }
  72. // Parses the constructor's encoding argument, writing output into the range
  73. // [out, out_end). Returns true on success and false for input whose
  74. // structure was not recognized or exceeded implementation limits, such as by
  75. // nesting structures too deep. In either case *this should not be used
  76. // again.
  77. ABSL_MUST_USE_RESULT bool Parse() && {
  78. // Recursively parses the grammar production named by callee, then resumes
  79. // execution at the next statement.
  80. //
  81. // Recursive-descent parsing is a beautifully readable translation of a
  82. // grammar, but it risks stack overflow if implemented by naive recursion on
  83. // the C++ call stack. So we simulate recursion by goto and switch instead,
  84. // keeping a bounded stack of "return addresses" in the recursion_stack_
  85. // member.
  86. //
  87. // The callee argument is a statement label. We goto that label after
  88. // saving the "return address" on recursion_stack_. The next continue
  89. // statement in the for loop below "returns" from this "call".
  90. //
  91. // The caller argument names the return point. Each value of caller must
  92. // appear in only one ABSL_DEMANGLER_RECURSE call and be listed in the
  93. // definition of enum ReturnAddress. The switch implements the control
  94. // transfer from the end of a "called" subroutine back to the statement
  95. // after the "call".
  96. //
  97. // Note that not all the grammar productions have to be packed into the
  98. // switch, but only those which appear in a cycle in the grammar. Anything
  99. // acyclic can be written as ordinary functions and function calls, e.g.,
  100. // ParseIdentifier.
  101. #define ABSL_DEMANGLER_RECURSE(callee, caller) \
  102. do { \
  103. if (recursion_depth_ == kStackSize) return false; \
  104. /* The next continue will switch on this saved value ... */ \
  105. recursion_stack_[recursion_depth_++] = caller; \
  106. goto callee; \
  107. /* ... and will land here, resuming the suspended code. */ \
  108. case caller: {} \
  109. } while (0)
  110. // Parse the encoding, counting completed recursive calls to guard against
  111. // excessively complex input and infinite-loop bugs.
  112. int iter = 0;
  113. goto whole_encoding;
  114. for (; iter < kMaxReturns && recursion_depth_ > 0; ++iter) {
  115. // This switch resumes the code path most recently suspended by
  116. // ABSL_DEMANGLER_RECURSE.
  117. switch (recursion_stack_[--recursion_depth_]) {
  118. //
  119. // symbol-name ->
  120. // _R decimal-number? path instantiating-crate? vendor-specific-suffix?
  121. whole_encoding:
  122. if (!Eat('_') || !Eat('R')) return false;
  123. // decimal-number? is always empty today, so proceed to path, which
  124. // can't start with a decimal digit.
  125. ABSL_DEMANGLER_RECURSE(path, kInstantiatingCrate);
  126. if (IsAlpha(Peek())) {
  127. ++silence_depth_; // Print nothing more from here on.
  128. ABSL_DEMANGLER_RECURSE(path, kVendorSpecificSuffix);
  129. }
  130. switch (Take()) {
  131. case '.': case '$': case '\0': return true;
  132. }
  133. return false; // unexpected trailing content
  134. // path -> crate-root | inherent-impl | trait-impl | trait-definition |
  135. // nested-path | generic-args | backref
  136. //
  137. // Note that ABSL_DEMANGLER_RECURSE does not work inside a nested switch
  138. // (which would hide the generated case label). Thus we jump out of the
  139. // inner switch with gotos before performing any fake recursion.
  140. path:
  141. switch (Take()) {
  142. case 'C': goto crate_root;
  143. case 'M': goto inherent_impl;
  144. case 'X': goto trait_impl;
  145. case 'Y': goto trait_definition;
  146. case 'N': goto nested_path;
  147. case 'I': goto generic_args;
  148. case 'B': goto path_backref;
  149. default: return false;
  150. }
  151. // crate-root -> C identifier (C consumed above)
  152. crate_root:
  153. if (!ParseIdentifier()) return false;
  154. continue;
  155. // inherent-impl -> M impl-path type (M already consumed)
  156. inherent_impl:
  157. if (!Emit("<")) return false;
  158. ABSL_DEMANGLER_RECURSE(impl_path, kInherentImplType);
  159. ABSL_DEMANGLER_RECURSE(type, kInherentImplEnding);
  160. if (!Emit(">")) return false;
  161. continue;
  162. // trait-impl -> X impl-path type path (X already consumed)
  163. trait_impl:
  164. if (!Emit("<")) return false;
  165. ABSL_DEMANGLER_RECURSE(impl_path, kTraitImplType);
  166. ABSL_DEMANGLER_RECURSE(type, kTraitImplInfix);
  167. if (!Emit(" as ")) return false;
  168. ABSL_DEMANGLER_RECURSE(path, kTraitImplEnding);
  169. if (!Emit(">")) return false;
  170. continue;
  171. // impl-path -> disambiguator? path (but never print it!)
  172. impl_path:
  173. ++silence_depth_;
  174. {
  175. int ignored_disambiguator;
  176. if (!ParseDisambiguator(ignored_disambiguator)) return false;
  177. }
  178. ABSL_DEMANGLER_RECURSE(path, kImplPathEnding);
  179. --silence_depth_;
  180. continue;
  181. // trait-definition -> Y type path (Y already consumed)
  182. trait_definition:
  183. if (!Emit("<")) return false;
  184. ABSL_DEMANGLER_RECURSE(type, kTraitDefinitionInfix);
  185. if (!Emit(" as ")) return false;
  186. ABSL_DEMANGLER_RECURSE(path, kTraitDefinitionEnding);
  187. if (!Emit(">")) return false;
  188. continue;
  189. // nested-path -> N namespace path identifier (N already consumed)
  190. // namespace -> lower | upper
  191. nested_path:
  192. // Uppercase namespaces must be saved on a stack so we can print
  193. // ::{closure#0} or ::{shim:vtable#0} or ::{X:name#0} as needed.
  194. if (IsUpper(Peek())) {
  195. if (!PushNamespace(Take())) return false;
  196. ABSL_DEMANGLER_RECURSE(path, kIdentifierInUppercaseNamespace);
  197. if (!Emit("::")) return false;
  198. if (!ParseIdentifier(PopNamespace())) return false;
  199. continue;
  200. }
  201. // Lowercase namespaces, however, are never represented in the output;
  202. // they all emit just ::name.
  203. if (IsLower(Take())) {
  204. ABSL_DEMANGLER_RECURSE(path, kIdentifierInLowercaseNamespace);
  205. if (!Emit("::")) return false;
  206. if (!ParseIdentifier()) return false;
  207. continue;
  208. }
  209. // Neither upper or lower
  210. return false;
  211. // type -> basic-type | array-type | slice-type | tuple-type |
  212. // ref-type | mut-ref-type | const-ptr-type | mut-ptr-type |
  213. // fn-type | dyn-trait-type | path | backref
  214. //
  215. // We use ifs instead of switch (Take()) because the default case jumps
  216. // to path, which will need to see the first character not yet Taken
  217. // from the input. Because we do not use a nested switch here,
  218. // ABSL_DEMANGLER_RECURSE works fine in the 'S' case.
  219. type:
  220. if (IsLower(Peek())) {
  221. const char* type_name = BasicTypeName(Take());
  222. if (type_name == nullptr || !Emit(type_name)) return false;
  223. continue;
  224. }
  225. if (Eat('A')) {
  226. // array-type = A type const
  227. if (!Emit("[")) return false;
  228. ABSL_DEMANGLER_RECURSE(type, kArraySize);
  229. if (!Emit("; ")) return false;
  230. ABSL_DEMANGLER_RECURSE(constant, kFinishArray);
  231. if (!Emit("]")) return false;
  232. continue;
  233. }
  234. if (Eat('S')) {
  235. if (!Emit("[")) return false;
  236. ABSL_DEMANGLER_RECURSE(type, kSliceEnding);
  237. if (!Emit("]")) return false;
  238. continue;
  239. }
  240. if (Eat('T')) goto tuple_type;
  241. if (Eat('R')) {
  242. if (!Emit("&")) return false;
  243. if (!ParseOptionalLifetime()) return false;
  244. goto type;
  245. }
  246. if (Eat('Q')) {
  247. if (!Emit("&mut ")) return false;
  248. if (!ParseOptionalLifetime()) return false;
  249. goto type;
  250. }
  251. if (Eat('P')) {
  252. if (!Emit("*const ")) return false;
  253. goto type;
  254. }
  255. if (Eat('O')) {
  256. if (!Emit("*mut ")) return false;
  257. goto type;
  258. }
  259. if (Eat('F')) goto fn_type;
  260. if (Eat('D')) goto dyn_trait_type;
  261. if (Eat('B')) goto type_backref;
  262. goto path;
  263. // tuple-type -> T type* E (T already consumed)
  264. tuple_type:
  265. if (!Emit("(")) return false;
  266. // The toolchain should call the unit type u instead of TE, but the
  267. // grammar and other demanglers also recognize TE, so we do too.
  268. if (Eat('E')) {
  269. if (!Emit(")")) return false;
  270. continue;
  271. }
  272. // A tuple with one element is rendered (type,) instead of (type).
  273. ABSL_DEMANGLER_RECURSE(type, kAfterFirstTupleElement);
  274. if (Eat('E')) {
  275. if (!Emit(",)")) return false;
  276. continue;
  277. }
  278. // A tuple with two elements is of course (x, y).
  279. if (!Emit(", ")) return false;
  280. ABSL_DEMANGLER_RECURSE(type, kAfterSecondTupleElement);
  281. if (Eat('E')) {
  282. if (!Emit(")")) return false;
  283. continue;
  284. }
  285. // And (x, y, z) for three elements.
  286. if (!Emit(", ")) return false;
  287. ABSL_DEMANGLER_RECURSE(type, kAfterThirdTupleElement);
  288. if (Eat('E')) {
  289. if (!Emit(")")) return false;
  290. continue;
  291. }
  292. // For longer tuples we write (x, y, z, ...), printing none of the
  293. // content of the fourth and later types. Thus we avoid exhausting
  294. // output buffers and human readers' patience when some library has a
  295. // long tuple as an implementation detail, without having to
  296. // completely obfuscate all tuples.
  297. if (!Emit(", ...)")) return false;
  298. ++silence_depth_;
  299. while (!Eat('E')) {
  300. ABSL_DEMANGLER_RECURSE(type, kAfterSubsequentTupleElement);
  301. }
  302. --silence_depth_;
  303. continue;
  304. // fn-type -> F fn-sig (F already consumed)
  305. // fn-sig -> binder? U? (K abi)? type* E type
  306. // abi -> C | undisambiguated-identifier
  307. //
  308. // We follow the C++ demangler in suppressing details of function
  309. // signatures. Every function type is rendered "fn...".
  310. fn_type:
  311. if (!Emit("fn...")) return false;
  312. ++silence_depth_;
  313. if (!ParseOptionalBinder()) return false;
  314. (void)Eat('U');
  315. if (Eat('K')) {
  316. if (!Eat('C') && !ParseUndisambiguatedIdentifier()) return false;
  317. }
  318. while (!Eat('E')) {
  319. ABSL_DEMANGLER_RECURSE(type, kContinueParameterList);
  320. }
  321. ABSL_DEMANGLER_RECURSE(type, kFinishFn);
  322. --silence_depth_;
  323. continue;
  324. // dyn-trait-type -> D dyn-bounds lifetime (D already consumed)
  325. // dyn-bounds -> binder? dyn-trait* E
  326. //
  327. // The grammar strangely allows an empty trait list, even though the
  328. // compiler should never output one. We follow existing demanglers in
  329. // rendering DEL_ as "dyn ".
  330. //
  331. // Because auto traits lengthen a type name considerably without
  332. // providing much value to a search for related source code, it would be
  333. // desirable to abbreviate
  334. // dyn main::Trait + std::marker::Copy + std::marker::Send
  335. // to
  336. // dyn main::Trait + ...,
  337. // eliding the auto traits. But it is difficult to do so correctly, in
  338. // part because there is no guarantee that the mangling will list the
  339. // main trait first. So we just print all the traits in their order of
  340. // appearance in the mangled name.
  341. dyn_trait_type:
  342. if (!Emit("dyn ")) return false;
  343. if (!ParseOptionalBinder()) return false;
  344. if (!Eat('E')) {
  345. ABSL_DEMANGLER_RECURSE(dyn_trait, kBeginAutoTraits);
  346. while (!Eat('E')) {
  347. if (!Emit(" + ")) return false;
  348. ABSL_DEMANGLER_RECURSE(dyn_trait, kContinueAutoTraits);
  349. }
  350. }
  351. if (!ParseRequiredLifetime()) return false;
  352. continue;
  353. // dyn-trait -> path dyn-trait-assoc-binding*
  354. // dyn-trait-assoc-binding -> p undisambiguated-identifier type
  355. //
  356. // We render nonempty binding lists as <>, omitting their contents as
  357. // for generic-args.
  358. dyn_trait:
  359. ABSL_DEMANGLER_RECURSE(path, kContinueDynTrait);
  360. if (Peek() == 'p') {
  361. if (!Emit("<>")) return false;
  362. ++silence_depth_;
  363. while (Eat('p')) {
  364. if (!ParseUndisambiguatedIdentifier()) return false;
  365. ABSL_DEMANGLER_RECURSE(type, kContinueAssocBinding);
  366. }
  367. --silence_depth_;
  368. }
  369. continue;
  370. // const -> type const-data | p | backref
  371. //
  372. // const is a C++ keyword, so we use the label `constant` instead.
  373. constant:
  374. if (Eat('B')) goto const_backref;
  375. if (Eat('p')) {
  376. if (!Emit("_")) return false;
  377. continue;
  378. }
  379. // Scan the type without printing it.
  380. //
  381. // The Rust language restricts the type of a const generic argument
  382. // much more than the mangling grammar does. We do not enforce this.
  383. //
  384. // We also do not bother printing false, true, 'A', and '\u{abcd}' for
  385. // the types bool and char. Because we do not print generic-args
  386. // contents, we expect to print constants only in array sizes, and
  387. // those should not be bool or char.
  388. ++silence_depth_;
  389. ABSL_DEMANGLER_RECURSE(type, kConstData);
  390. --silence_depth_;
  391. // const-data -> n? hex-digit* _
  392. //
  393. // Although the grammar doesn't say this, existing demanglers expect
  394. // that zero is 0, not an empty digit sequence, and no nonzero value
  395. // may have leading zero digits. Also n0_ is accepted and printed as
  396. // -0, though a toolchain will probably never write that encoding.
  397. if (Eat('n') && !EmitChar('-')) return false;
  398. if (!Emit("0x")) return false;
  399. if (Eat('0')) {
  400. if (!EmitChar('0')) return false;
  401. if (!Eat('_')) return false;
  402. continue;
  403. }
  404. while (IsLowerHexDigit(Peek())) {
  405. if (!EmitChar(Take())) return false;
  406. }
  407. if (!Eat('_')) return false;
  408. continue;
  409. // generic-args -> I path generic-arg* E (I already consumed)
  410. //
  411. // We follow the C++ demangler in omitting all the arguments from the
  412. // output, printing only the list opening and closing tokens.
  413. generic_args:
  414. ABSL_DEMANGLER_RECURSE(path, kBeginGenericArgList);
  415. if (!Emit("::<>")) return false;
  416. ++silence_depth_;
  417. while (!Eat('E')) {
  418. ABSL_DEMANGLER_RECURSE(generic_arg, kContinueGenericArgList);
  419. }
  420. --silence_depth_;
  421. continue;
  422. // generic-arg -> lifetime | type | K const
  423. generic_arg:
  424. if (Peek() == 'L') {
  425. if (!ParseOptionalLifetime()) return false;
  426. continue;
  427. }
  428. if (Eat('K')) goto constant;
  429. goto type;
  430. // backref -> B base-62-number (B already consumed)
  431. //
  432. // The BeginBackref call parses and range-checks the base-62-number. We
  433. // always do that much.
  434. //
  435. // The recursive call parses and prints what the backref points at. We
  436. // save CPU and stack by skipping this work if the output would be
  437. // suppressed anyway.
  438. path_backref:
  439. if (!BeginBackref()) return false;
  440. if (silence_depth_ == 0) {
  441. ABSL_DEMANGLER_RECURSE(path, kPathBackrefEnding);
  442. }
  443. EndBackref();
  444. continue;
  445. // This represents the same backref production as in path_backref but
  446. // parses the target as a type instead of a path.
  447. type_backref:
  448. if (!BeginBackref()) return false;
  449. if (silence_depth_ == 0) {
  450. ABSL_DEMANGLER_RECURSE(type, kTypeBackrefEnding);
  451. }
  452. EndBackref();
  453. continue;
  454. const_backref:
  455. if (!BeginBackref()) return false;
  456. if (silence_depth_ == 0) {
  457. ABSL_DEMANGLER_RECURSE(constant, kConstantBackrefEnding);
  458. }
  459. EndBackref();
  460. continue;
  461. }
  462. }
  463. return false; // hit iteration limit or a bug in our stack handling
  464. }
  465. private:
  466. // Enumerates resumption points for ABSL_DEMANGLER_RECURSE calls.
  467. enum ReturnAddress : uint8_t {
  468. kInstantiatingCrate,
  469. kVendorSpecificSuffix,
  470. kIdentifierInUppercaseNamespace,
  471. kIdentifierInLowercaseNamespace,
  472. kInherentImplType,
  473. kInherentImplEnding,
  474. kTraitImplType,
  475. kTraitImplInfix,
  476. kTraitImplEnding,
  477. kImplPathEnding,
  478. kTraitDefinitionInfix,
  479. kTraitDefinitionEnding,
  480. kArraySize,
  481. kFinishArray,
  482. kSliceEnding,
  483. kAfterFirstTupleElement,
  484. kAfterSecondTupleElement,
  485. kAfterThirdTupleElement,
  486. kAfterSubsequentTupleElement,
  487. kContinueParameterList,
  488. kFinishFn,
  489. kBeginAutoTraits,
  490. kContinueAutoTraits,
  491. kContinueDynTrait,
  492. kContinueAssocBinding,
  493. kConstData,
  494. kBeginGenericArgList,
  495. kContinueGenericArgList,
  496. kPathBackrefEnding,
  497. kTypeBackrefEnding,
  498. kConstantBackrefEnding,
  499. };
  500. // Element counts for the stack arrays. Larger stack sizes accommodate more
  501. // deeply nested names at the cost of a larger footprint on the C++ call
  502. // stack.
  503. enum {
  504. // Maximum recursive calls outstanding at one time.
  505. kStackSize = 256,
  506. // Maximum N<uppercase> nested-paths open at once. We do not expect
  507. // closures inside closures inside closures as much as functions inside
  508. // modules inside other modules, so we can use a smaller array here.
  509. kNamespaceStackSize = 64,
  510. // Maximum number of nested backrefs. We can keep this stack pretty small
  511. // because we do not follow backrefs inside generic-args or other contexts
  512. // that suppress printing, so deep stacking is unlikely in practice.
  513. kPositionStackSize = 16,
  514. };
  515. // Returns the next input character without consuming it.
  516. char Peek() const { return encoding_[pos_]; }
  517. // Consumes and returns the next input character.
  518. char Take() { return encoding_[pos_++]; }
  519. // If the next input character is the given character, consumes it and returns
  520. // true; otherwise returns false without consuming a character.
  521. ABSL_MUST_USE_RESULT bool Eat(char want) {
  522. if (encoding_[pos_] != want) return false;
  523. ++pos_;
  524. return true;
  525. }
  526. // Provided there is enough remaining output space, appends c to the output,
  527. // writing a fresh NUL terminator afterward, and returns true. Returns false
  528. // if the output buffer had less than two bytes free.
  529. ABSL_MUST_USE_RESULT bool EmitChar(char c) {
  530. if (silence_depth_ > 0) return true;
  531. if (out_end_ - out_ < 2) return false;
  532. *out_++ = c;
  533. *out_ = '\0';
  534. return true;
  535. }
  536. // Provided there is enough remaining output space, appends the C string token
  537. // to the output, followed by a NUL character, and returns true. Returns
  538. // false if not everything fit into the output buffer.
  539. ABSL_MUST_USE_RESULT bool Emit(const char* token) {
  540. if (silence_depth_ > 0) return true;
  541. const size_t token_length = std::strlen(token);
  542. const size_t bytes_to_copy = token_length + 1; // token and final NUL
  543. if (static_cast<size_t>(out_end_ - out_) < bytes_to_copy) return false;
  544. std::memcpy(out_, token, bytes_to_copy);
  545. out_ += token_length;
  546. return true;
  547. }
  548. // Provided there is enough remaining output space, appends the decimal form
  549. // of disambiguator (if it's nonnegative) or "?" (if it's negative) to the
  550. // output, followed by a NUL character, and returns true. Returns false if
  551. // not everything fit into the output buffer.
  552. ABSL_MUST_USE_RESULT bool EmitDisambiguator(int disambiguator) {
  553. if (disambiguator < 0) return EmitChar('?'); // parsed but too large
  554. if (disambiguator == 0) return EmitChar('0');
  555. // Convert disambiguator to decimal text. Three digits per byte is enough
  556. // because 999 > 256. The bound will remain correct even if future
  557. // maintenance changes the type of the disambiguator variable.
  558. char digits[3 * sizeof(disambiguator)] = {};
  559. size_t leading_digit_index = sizeof(digits) - 1;
  560. for (; disambiguator > 0; disambiguator /= 10) {
  561. digits[--leading_digit_index] =
  562. static_cast<char>('0' + disambiguator % 10);
  563. }
  564. return Emit(digits + leading_digit_index);
  565. }
  566. // Consumes an optional disambiguator (s123_) from the input.
  567. //
  568. // On success returns true and fills value with the encoded value if it was
  569. // not too big, otherwise with -1. If the optional disambiguator was omitted,
  570. // value is 0. On parse failure returns false and sets value to -1.
  571. ABSL_MUST_USE_RESULT bool ParseDisambiguator(int& value) {
  572. value = -1;
  573. // disambiguator = s base-62-number
  574. //
  575. // Disambiguators are optional. An omitted disambiguator is zero.
  576. if (!Eat('s')) {
  577. value = 0;
  578. return true;
  579. }
  580. int base_62_value = 0;
  581. if (!ParseBase62Number(base_62_value)) return false;
  582. value = base_62_value < 0 ? -1 : base_62_value + 1;
  583. return true;
  584. }
  585. // Consumes a base-62 number like _ or 123_ from the input.
  586. //
  587. // On success returns true and fills value with the encoded value if it was
  588. // not too big, otherwise with -1. On parse failure returns false and sets
  589. // value to -1.
  590. ABSL_MUST_USE_RESULT bool ParseBase62Number(int& value) {
  591. value = -1;
  592. // base-62-number = (digit | lower | upper)* _
  593. //
  594. // An empty base-62 digit sequence means 0.
  595. if (Eat('_')) {
  596. value = 0;
  597. return true;
  598. }
  599. // A nonempty digit sequence denotes its base-62 value plus 1.
  600. int encoded_number = 0;
  601. bool overflowed = false;
  602. while (IsAlpha(Peek()) || IsDigit(Peek())) {
  603. const char c = Take();
  604. if (encoded_number >= std::numeric_limits<int>::max()/62) {
  605. // If we are close to overflowing an int, keep parsing but stop updating
  606. // encoded_number and remember to return -1 at the end. The point is to
  607. // avoid undefined behavior while parsing crate-root disambiguators,
  608. // which are large in practice but not shown in demangling, while
  609. // successfully computing closure and shim disambiguators, which are
  610. // typically small and are printed out.
  611. overflowed = true;
  612. } else {
  613. int digit;
  614. if (IsDigit(c)) {
  615. digit = c - '0';
  616. } else if (IsLower(c)) {
  617. digit = c - 'a' + 10;
  618. } else {
  619. digit = c - 'A' + 36;
  620. }
  621. encoded_number = 62 * encoded_number + digit;
  622. }
  623. }
  624. if (!Eat('_')) return false;
  625. if (!overflowed) value = encoded_number + 1;
  626. return true;
  627. }
  628. // Consumes an identifier from the input, returning true on success.
  629. //
  630. // A nonzero uppercase_namespace specifies the character after the N in a
  631. // nested-identifier, e.g., 'C' for a closure, allowing ParseIdentifier to
  632. // write out the name with the conventional decoration for that namespace.
  633. ABSL_MUST_USE_RESULT bool ParseIdentifier(char uppercase_namespace = '\0') {
  634. // identifier -> disambiguator? undisambiguated-identifier
  635. int disambiguator = 0;
  636. if (!ParseDisambiguator(disambiguator)) return false;
  637. return ParseUndisambiguatedIdentifier(uppercase_namespace, disambiguator);
  638. }
  639. // Consumes from the input an identifier with no preceding disambiguator,
  640. // returning true on success.
  641. //
  642. // When ParseIdentifier calls this, it passes the N<namespace> character and
  643. // disambiguator value so that "{closure#42}" and similar forms can be
  644. // rendered correctly.
  645. //
  646. // At other appearances of undisambiguated-identifier in the grammar, this
  647. // treatment is not applicable, and the call site omits both arguments.
  648. ABSL_MUST_USE_RESULT bool ParseUndisambiguatedIdentifier(
  649. char uppercase_namespace = '\0', int disambiguator = 0) {
  650. // undisambiguated-identifier -> u? decimal-number _? bytes
  651. const bool is_punycoded = Eat('u');
  652. if (!IsDigit(Peek())) return false;
  653. int num_bytes = 0;
  654. if (!ParseDecimalNumber(num_bytes)) return false;
  655. (void)Eat('_'); // optional separator, needed if a digit follows
  656. if (is_punycoded) {
  657. DecodeRustPunycodeOptions options;
  658. options.punycode_begin = &encoding_[pos_];
  659. options.punycode_end = &encoding_[pos_] + num_bytes;
  660. options.out_begin = out_;
  661. options.out_end = out_end_;
  662. out_ = DecodeRustPunycode(options);
  663. if (out_ == nullptr) return false;
  664. pos_ += static_cast<size_t>(num_bytes);
  665. }
  666. // Emit the beginnings of braced forms like {shim:vtable#0}.
  667. if (uppercase_namespace != '\0') {
  668. switch (uppercase_namespace) {
  669. case 'C':
  670. if (!Emit("{closure")) return false;
  671. break;
  672. case 'S':
  673. if (!Emit("{shim")) return false;
  674. break;
  675. default:
  676. if (!EmitChar('{') || !EmitChar(uppercase_namespace)) return false;
  677. break;
  678. }
  679. if (num_bytes > 0 && !Emit(":")) return false;
  680. }
  681. // Emit the name itself.
  682. if (!is_punycoded) {
  683. for (int i = 0; i < num_bytes; ++i) {
  684. const char c = Take();
  685. if (!IsIdentifierChar(c) &&
  686. // The spec gives toolchains the choice of Punycode or raw UTF-8 for
  687. // identifiers containing code points above 0x7f, so accept bytes
  688. // with the high bit set.
  689. (c & 0x80) == 0) {
  690. return false;
  691. }
  692. if (!EmitChar(c)) return false;
  693. }
  694. }
  695. // Emit the endings of braced forms, e.g., "#42}".
  696. if (uppercase_namespace != '\0') {
  697. if (!EmitChar('#')) return false;
  698. if (!EmitDisambiguator(disambiguator)) return false;
  699. if (!EmitChar('}')) return false;
  700. }
  701. return true;
  702. }
  703. // Consumes a decimal number like 0 or 123 from the input. On success returns
  704. // true and fills value with the encoded value. If the encoded value is too
  705. // large or otherwise unparsable, returns false and sets value to -1.
  706. ABSL_MUST_USE_RESULT bool ParseDecimalNumber(int& value) {
  707. value = -1;
  708. if (!IsDigit(Peek())) return false;
  709. int encoded_number = Take() - '0';
  710. if (encoded_number == 0) {
  711. // Decimal numbers are never encoded with extra leading zeroes.
  712. value = 0;
  713. return true;
  714. }
  715. while (IsDigit(Peek()) &&
  716. // avoid overflow
  717. encoded_number < std::numeric_limits<int>::max()/10) {
  718. encoded_number = 10 * encoded_number + (Take() - '0');
  719. }
  720. if (IsDigit(Peek())) return false; // too big
  721. value = encoded_number;
  722. return true;
  723. }
  724. // Consumes a binder of higher-ranked lifetimes if one is present. On success
  725. // returns true and discards the encoded lifetime count. On parse failure
  726. // returns false.
  727. ABSL_MUST_USE_RESULT bool ParseOptionalBinder() {
  728. // binder -> G base-62-number
  729. if (!Eat('G')) return true;
  730. int ignored_binding_count;
  731. return ParseBase62Number(ignored_binding_count);
  732. }
  733. // Consumes a lifetime if one is present.
  734. //
  735. // On success returns true and discards the lifetime index. We do not print
  736. // or even range-check lifetimes because they are a finer detail than other
  737. // things we omit from output, such as the entire contents of generic-args.
  738. //
  739. // On parse failure returns false.
  740. ABSL_MUST_USE_RESULT bool ParseOptionalLifetime() {
  741. // lifetime -> L base-62-number
  742. if (!Eat('L')) return true;
  743. int ignored_de_bruijn_index;
  744. return ParseBase62Number(ignored_de_bruijn_index);
  745. }
  746. // Consumes a lifetime just like ParseOptionalLifetime, but returns false if
  747. // there is no lifetime here.
  748. ABSL_MUST_USE_RESULT bool ParseRequiredLifetime() {
  749. if (Peek() != 'L') return false;
  750. return ParseOptionalLifetime();
  751. }
  752. // Pushes ns onto the namespace stack and returns true if the stack is not
  753. // full, else returns false.
  754. ABSL_MUST_USE_RESULT bool PushNamespace(char ns) {
  755. if (namespace_depth_ == kNamespaceStackSize) return false;
  756. namespace_stack_[namespace_depth_++] = ns;
  757. return true;
  758. }
  759. // Pops the last pushed namespace. Requires that the namespace stack is not
  760. // empty (namespace_depth_ > 0).
  761. char PopNamespace() { return namespace_stack_[--namespace_depth_]; }
  762. // Pushes position onto the position stack and returns true if the stack is
  763. // not full, else returns false.
  764. ABSL_MUST_USE_RESULT bool PushPosition(int position) {
  765. if (position_depth_ == kPositionStackSize) return false;
  766. position_stack_[position_depth_++] = position;
  767. return true;
  768. }
  769. // Pops the last pushed input position. Requires that the position stack is
  770. // not empty (position_depth_ > 0).
  771. int PopPosition() { return position_stack_[--position_depth_]; }
  772. // Consumes a base-62-number denoting a backref target, pushes the current
  773. // input position on the data stack, and sets the input position to the
  774. // beginning of the backref target. Returns true on success. Returns false
  775. // if parsing failed, the stack is exhausted, or the backref target position
  776. // is out of range.
  777. ABSL_MUST_USE_RESULT bool BeginBackref() {
  778. // backref = B base-62-number (B already consumed)
  779. //
  780. // Reject backrefs that don't parse, overflow int, or don't point backward.
  781. // If the offset looks fine, adjust it to account for the _R prefix.
  782. int offset = 0;
  783. const int offset_of_this_backref =
  784. pos_ - 2 /* _R */ - 1 /* B already consumed */;
  785. if (!ParseBase62Number(offset) || offset < 0 ||
  786. offset >= offset_of_this_backref) {
  787. return false;
  788. }
  789. offset += 2;
  790. // Save the old position to restore later.
  791. if (!PushPosition(pos_)) return false;
  792. // Move the input position to the backref target.
  793. //
  794. // Note that we do not check whether the new position points to the
  795. // beginning of a construct matching the context in which the backref
  796. // appeared. We just jump to it and see whether nested parsing succeeds.
  797. // We therefore accept various wrong manglings, e.g., a type backref
  798. // pointing to an 'l' character inside an identifier, which happens to mean
  799. // i32 when parsed as a type mangling. This saves the complexity and RAM
  800. // footprint of remembering which offsets began which kinds of
  801. // substructures. Existing demanglers take similar shortcuts.
  802. pos_ = offset;
  803. return true;
  804. }
  805. // Cleans up after a backref production by restoring the previous input
  806. // position from the data stack.
  807. void EndBackref() { pos_ = PopPosition(); }
  808. // The leftmost recursion_depth_ elements of recursion_stack_ contain the
  809. // ReturnAddresses pushed by ABSL_DEMANGLER_RECURSE calls not yet completed.
  810. ReturnAddress recursion_stack_[kStackSize] = {};
  811. int recursion_depth_ = 0;
  812. // The leftmost namespace_depth_ elements of namespace_stack_ contain the
  813. // uppercase namespace identifiers for open nested-paths, e.g., 'C' for a
  814. // closure.
  815. char namespace_stack_[kNamespaceStackSize] = {};
  816. int namespace_depth_ = 0;
  817. // The leftmost position_depth_ elements of position_stack_ contain the input
  818. // positions to return to after fully printing the targets of backrefs.
  819. int position_stack_[kPositionStackSize] = {};
  820. int position_depth_ = 0;
  821. // Anything parsed while silence_depth_ > 0 contributes nothing to the
  822. // demangled output. For constructs omitted from the demangling, such as
  823. // impl-path and the contents of generic-args, we will increment
  824. // silence_depth_ on the way in and decrement silence_depth_ on the way out.
  825. int silence_depth_ = 0;
  826. // Input: encoding_ points to a Rust mangled symbol, and encoding_[pos_] is
  827. // the next input character to be scanned.
  828. int pos_ = 0;
  829. const char* encoding_ = nullptr;
  830. // Output: *out_ is where the next output character should be written, and
  831. // out_end_ points past the last byte of available space.
  832. char* out_ = nullptr;
  833. char* out_end_ = nullptr;
  834. };
  835. } // namespace
  836. bool DemangleRustSymbolEncoding(const char* mangled, char* out,
  837. size_t out_size) {
  838. return RustSymbolParser(mangled, out, out + out_size).Parse();
  839. }
  840. } // namespace debugging_internal
  841. ABSL_NAMESPACE_END
  842. } // namespace absl