demangle.cc 66 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988
  1. // Copyright 2018 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // For reference check out:
  15. // https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling
  16. //
  17. // Note that we only have partial C++11 support yet.
  18. #include "y_absl/debugging/internal/demangle.h"
  19. #include <cstdint>
  20. #include <cstdio>
  21. #include <limits>
  22. namespace y_absl {
  23. Y_ABSL_NAMESPACE_BEGIN
  24. namespace debugging_internal {
  25. typedef struct {
  26. const char *abbrev;
  27. const char *real_name;
  28. // Number of arguments in <expression> context, or 0 if disallowed.
  29. int arity;
  30. } AbbrevPair;
  31. // List of operators from Itanium C++ ABI.
  32. static const AbbrevPair kOperatorList[] = {
  33. // New has special syntax (not currently supported).
  34. {"nw", "new", 0},
  35. {"na", "new[]", 0},
  36. // Works except that the 'gs' prefix is not supported.
  37. {"dl", "delete", 1},
  38. {"da", "delete[]", 1},
  39. {"ps", "+", 1}, // "positive"
  40. {"ng", "-", 1}, // "negative"
  41. {"ad", "&", 1}, // "address-of"
  42. {"de", "*", 1}, // "dereference"
  43. {"co", "~", 1},
  44. {"pl", "+", 2},
  45. {"mi", "-", 2},
  46. {"ml", "*", 2},
  47. {"dv", "/", 2},
  48. {"rm", "%", 2},
  49. {"an", "&", 2},
  50. {"or", "|", 2},
  51. {"eo", "^", 2},
  52. {"aS", "=", 2},
  53. {"pL", "+=", 2},
  54. {"mI", "-=", 2},
  55. {"mL", "*=", 2},
  56. {"dV", "/=", 2},
  57. {"rM", "%=", 2},
  58. {"aN", "&=", 2},
  59. {"oR", "|=", 2},
  60. {"eO", "^=", 2},
  61. {"ls", "<<", 2},
  62. {"rs", ">>", 2},
  63. {"lS", "<<=", 2},
  64. {"rS", ">>=", 2},
  65. {"eq", "==", 2},
  66. {"ne", "!=", 2},
  67. {"lt", "<", 2},
  68. {"gt", ">", 2},
  69. {"le", "<=", 2},
  70. {"ge", ">=", 2},
  71. {"nt", "!", 1},
  72. {"aa", "&&", 2},
  73. {"oo", "||", 2},
  74. {"pp", "++", 1},
  75. {"mm", "--", 1},
  76. {"cm", ",", 2},
  77. {"pm", "->*", 2},
  78. {"pt", "->", 0}, // Special syntax
  79. {"cl", "()", 0}, // Special syntax
  80. {"ix", "[]", 2},
  81. {"qu", "?", 3},
  82. {"st", "sizeof", 0}, // Special syntax
  83. {"sz", "sizeof", 1}, // Not a real operator name, but used in expressions.
  84. {nullptr, nullptr, 0},
  85. };
  86. // List of builtin types from Itanium C++ ABI.
  87. //
  88. // Invariant: only one- or two-character type abbreviations here.
  89. static const AbbrevPair kBuiltinTypeList[] = {
  90. {"v", "void", 0},
  91. {"w", "wchar_t", 0},
  92. {"b", "bool", 0},
  93. {"c", "char", 0},
  94. {"a", "signed char", 0},
  95. {"h", "unsigned char", 0},
  96. {"s", "short", 0},
  97. {"t", "unsigned short", 0},
  98. {"i", "int", 0},
  99. {"j", "unsigned int", 0},
  100. {"l", "long", 0},
  101. {"m", "unsigned long", 0},
  102. {"x", "long long", 0},
  103. {"y", "unsigned long long", 0},
  104. {"n", "__int128", 0},
  105. {"o", "unsigned __int128", 0},
  106. {"f", "float", 0},
  107. {"d", "double", 0},
  108. {"e", "long double", 0},
  109. {"g", "__float128", 0},
  110. {"z", "ellipsis", 0},
  111. {"De", "decimal128", 0}, // IEEE 754r decimal floating point (128 bits)
  112. {"Dd", "decimal64", 0}, // IEEE 754r decimal floating point (64 bits)
  113. {"Dc", "decltype(auto)", 0},
  114. {"Da", "auto", 0},
  115. {"Dn", "std::nullptr_t", 0}, // i.e., decltype(nullptr)
  116. {"Df", "decimal32", 0}, // IEEE 754r decimal floating point (32 bits)
  117. {"Di", "char32_t", 0},
  118. {"Du", "char8_t", 0},
  119. {"Ds", "char16_t", 0},
  120. {"Dh", "float16", 0}, // IEEE 754r half-precision float (16 bits)
  121. {nullptr, nullptr, 0},
  122. };
  123. // List of substitutions Itanium C++ ABI.
  124. static const AbbrevPair kSubstitutionList[] = {
  125. {"St", "", 0},
  126. {"Sa", "allocator", 0},
  127. {"Sb", "basic_string", 0},
  128. // std::basic_string<char, std::char_traits<char>,std::allocator<char> >
  129. {"Ss", "string", 0},
  130. // std::basic_istream<char, std::char_traits<char> >
  131. {"Si", "istream", 0},
  132. // std::basic_ostream<char, std::char_traits<char> >
  133. {"So", "ostream", 0},
  134. // std::basic_iostream<char, std::char_traits<char> >
  135. {"Sd", "iostream", 0},
  136. {nullptr, nullptr, 0},
  137. };
  138. // State needed for demangling. This struct is copied in almost every stack
  139. // frame, so every byte counts.
  140. typedef struct {
  141. int mangled_idx; // Cursor of mangled name.
  142. int out_cur_idx; // Cursor of output string.
  143. int prev_name_idx; // For constructors/destructors.
  144. unsigned int prev_name_length : 16; // For constructors/destructors.
  145. signed int nest_level : 15; // For nested names.
  146. unsigned int append : 1; // Append flag.
  147. // Note: for some reason MSVC can't pack "bool append : 1" into the same int
  148. // with the above two fields, so we use an int instead. Amusingly it can pack
  149. // "signed bool" as expected, but relying on that to continue to be a legal
  150. // type seems ill-advised (as it's illegal in at least clang).
  151. } ParseState;
  152. static_assert(sizeof(ParseState) == 4 * sizeof(int),
  153. "unexpected size of ParseState");
  154. // One-off state for demangling that's not subject to backtracking -- either
  155. // constant data, data that's intentionally immune to backtracking (steps), or
  156. // data that would never be changed by backtracking anyway (recursion_depth).
  157. //
  158. // Only one copy of this exists for each call to Demangle, so the size of this
  159. // struct is nearly inconsequential.
  160. typedef struct {
  161. const char *mangled_begin; // Beginning of input string.
  162. char *out; // Beginning of output string.
  163. int out_end_idx; // One past last allowed output character.
  164. int recursion_depth; // For stack exhaustion prevention.
  165. int steps; // Cap how much work we'll do, regardless of depth.
  166. ParseState parse_state; // Backtrackable state copied for most frames.
  167. } State;
  168. namespace {
  169. // Prevent deep recursion / stack exhaustion.
  170. // Also prevent unbounded handling of complex inputs.
  171. class ComplexityGuard {
  172. public:
  173. explicit ComplexityGuard(State *state) : state_(state) {
  174. ++state->recursion_depth;
  175. ++state->steps;
  176. }
  177. ~ComplexityGuard() { --state_->recursion_depth; }
  178. // 256 levels of recursion seems like a reasonable upper limit on depth.
  179. // 128 is not enough to demagle synthetic tests from demangle_unittest.txt:
  180. // "_ZaaZZZZ..." and "_ZaaZcvZcvZ..."
  181. static constexpr int kRecursionDepthLimit = 256;
  182. // We're trying to pick a charitable upper-limit on how many parse steps are
  183. // necessary to handle something that a human could actually make use of.
  184. // This is mostly in place as a bound on how much work we'll do if we are
  185. // asked to demangle an mangled name from an untrusted source, so it should be
  186. // much larger than the largest expected symbol, but much smaller than the
  187. // amount of work we can do in, e.g., a second.
  188. //
  189. // Some real-world symbols from an arbitrary binary started failing between
  190. // 2^12 and 2^13, so we multiply the latter by an extra factor of 16 to set
  191. // the limit.
  192. //
  193. // Spending one second on 2^17 parse steps would require each step to take
  194. // 7.6us, or ~30000 clock cycles, so it's safe to say this can be done in
  195. // under a second.
  196. static constexpr int kParseStepsLimit = 1 << 17;
  197. bool IsTooComplex() const {
  198. return state_->recursion_depth > kRecursionDepthLimit ||
  199. state_->steps > kParseStepsLimit;
  200. }
  201. private:
  202. State *state_;
  203. };
  204. } // namespace
  205. // We don't use strlen() in libc since it's not guaranteed to be async
  206. // signal safe.
  207. static size_t StrLen(const char *str) {
  208. size_t len = 0;
  209. while (*str != '\0') {
  210. ++str;
  211. ++len;
  212. }
  213. return len;
  214. }
  215. // Returns true if "str" has at least "n" characters remaining.
  216. static bool AtLeastNumCharsRemaining(const char *str, size_t n) {
  217. for (size_t i = 0; i < n; ++i) {
  218. if (str[i] == '\0') {
  219. return false;
  220. }
  221. }
  222. return true;
  223. }
  224. // Returns true if "str" has "prefix" as a prefix.
  225. static bool StrPrefix(const char *str, const char *prefix) {
  226. size_t i = 0;
  227. while (str[i] != '\0' && prefix[i] != '\0' && str[i] == prefix[i]) {
  228. ++i;
  229. }
  230. return prefix[i] == '\0'; // Consumed everything in "prefix".
  231. }
  232. static void InitState(State* state,
  233. const char* mangled,
  234. char* out,
  235. size_t out_size) {
  236. state->mangled_begin = mangled;
  237. state->out = out;
  238. state->out_end_idx = static_cast<int>(out_size);
  239. state->recursion_depth = 0;
  240. state->steps = 0;
  241. state->parse_state.mangled_idx = 0;
  242. state->parse_state.out_cur_idx = 0;
  243. state->parse_state.prev_name_idx = 0;
  244. state->parse_state.prev_name_length = 0;
  245. state->parse_state.nest_level = -1;
  246. state->parse_state.append = true;
  247. }
  248. static inline const char *RemainingInput(State *state) {
  249. return &state->mangled_begin[state->parse_state.mangled_idx];
  250. }
  251. // Returns true and advances "mangled_idx" if we find "one_char_token"
  252. // at "mangled_idx" position. It is assumed that "one_char_token" does
  253. // not contain '\0'.
  254. static bool ParseOneCharToken(State *state, const char one_char_token) {
  255. ComplexityGuard guard(state);
  256. if (guard.IsTooComplex()) return false;
  257. if (RemainingInput(state)[0] == one_char_token) {
  258. ++state->parse_state.mangled_idx;
  259. return true;
  260. }
  261. return false;
  262. }
  263. // Returns true and advances "mangled_cur" if we find "two_char_token"
  264. // at "mangled_cur" position. It is assumed that "two_char_token" does
  265. // not contain '\0'.
  266. static bool ParseTwoCharToken(State *state, const char *two_char_token) {
  267. ComplexityGuard guard(state);
  268. if (guard.IsTooComplex()) return false;
  269. if (RemainingInput(state)[0] == two_char_token[0] &&
  270. RemainingInput(state)[1] == two_char_token[1]) {
  271. state->parse_state.mangled_idx += 2;
  272. return true;
  273. }
  274. return false;
  275. }
  276. // Returns true and advances "mangled_cur" if we find any character in
  277. // "char_class" at "mangled_cur" position.
  278. static bool ParseCharClass(State *state, const char *char_class) {
  279. ComplexityGuard guard(state);
  280. if (guard.IsTooComplex()) return false;
  281. if (RemainingInput(state)[0] == '\0') {
  282. return false;
  283. }
  284. const char *p = char_class;
  285. for (; *p != '\0'; ++p) {
  286. if (RemainingInput(state)[0] == *p) {
  287. ++state->parse_state.mangled_idx;
  288. return true;
  289. }
  290. }
  291. return false;
  292. }
  293. static bool ParseDigit(State *state, int *digit) {
  294. char c = RemainingInput(state)[0];
  295. if (ParseCharClass(state, "0123456789")) {
  296. if (digit != nullptr) {
  297. *digit = c - '0';
  298. }
  299. return true;
  300. }
  301. return false;
  302. }
  303. // This function is used for handling an optional non-terminal.
  304. static bool Optional(bool /*status*/) { return true; }
  305. // This function is used for handling <non-terminal>+ syntax.
  306. typedef bool (*ParseFunc)(State *);
  307. static bool OneOrMore(ParseFunc parse_func, State *state) {
  308. if (parse_func(state)) {
  309. while (parse_func(state)) {
  310. }
  311. return true;
  312. }
  313. return false;
  314. }
  315. // This function is used for handling <non-terminal>* syntax. The function
  316. // always returns true and must be followed by a termination token or a
  317. // terminating sequence not handled by parse_func (e.g.
  318. // ParseOneCharToken(state, 'E')).
  319. static bool ZeroOrMore(ParseFunc parse_func, State *state) {
  320. while (parse_func(state)) {
  321. }
  322. return true;
  323. }
  324. // Append "str" at "out_cur_idx". If there is an overflow, out_cur_idx is
  325. // set to out_end_idx+1. The output string is ensured to
  326. // always terminate with '\0' as long as there is no overflow.
  327. static void Append(State *state, const char *const str, const size_t length) {
  328. for (size_t i = 0; i < length; ++i) {
  329. if (state->parse_state.out_cur_idx + 1 <
  330. state->out_end_idx) { // +1 for '\0'
  331. state->out[state->parse_state.out_cur_idx++] = str[i];
  332. } else {
  333. // signal overflow
  334. state->parse_state.out_cur_idx = state->out_end_idx + 1;
  335. break;
  336. }
  337. }
  338. if (state->parse_state.out_cur_idx < state->out_end_idx) {
  339. state->out[state->parse_state.out_cur_idx] =
  340. '\0'; // Terminate it with '\0'
  341. }
  342. }
  343. // We don't use equivalents in libc to avoid locale issues.
  344. static bool IsLower(char c) { return c >= 'a' && c <= 'z'; }
  345. static bool IsAlpha(char c) {
  346. return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
  347. }
  348. static bool IsDigit(char c) { return c >= '0' && c <= '9'; }
  349. // Returns true if "str" is a function clone suffix. These suffixes are used
  350. // by GCC 4.5.x and later versions (and our locally-modified version of GCC
  351. // 4.4.x) to indicate functions which have been cloned during optimization.
  352. // We treat any sequence (.<alpha>+.<digit>+)+ as a function clone suffix.
  353. // Additionally, '_' is allowed along with the alphanumeric sequence.
  354. static bool IsFunctionCloneSuffix(const char *str) {
  355. size_t i = 0;
  356. while (str[i] != '\0') {
  357. bool parsed = false;
  358. // Consume a single [.<alpha> | _]*[.<digit>]* sequence.
  359. if (str[i] == '.' && (IsAlpha(str[i + 1]) || str[i + 1] == '_')) {
  360. parsed = true;
  361. i += 2;
  362. while (IsAlpha(str[i]) || str[i] == '_') {
  363. ++i;
  364. }
  365. }
  366. if (str[i] == '.' && IsDigit(str[i + 1])) {
  367. parsed = true;
  368. i += 2;
  369. while (IsDigit(str[i])) {
  370. ++i;
  371. }
  372. }
  373. if (!parsed)
  374. return false;
  375. }
  376. return true; // Consumed everything in "str".
  377. }
  378. static bool EndsWith(State *state, const char chr) {
  379. return state->parse_state.out_cur_idx > 0 &&
  380. state->parse_state.out_cur_idx < state->out_end_idx &&
  381. chr == state->out[state->parse_state.out_cur_idx - 1];
  382. }
  383. // Append "str" with some tweaks, iff "append" state is true.
  384. static void MaybeAppendWithLength(State *state, const char *const str,
  385. const size_t length) {
  386. if (state->parse_state.append && length > 0) {
  387. // Append a space if the output buffer ends with '<' and "str"
  388. // starts with '<' to avoid <<<.
  389. if (str[0] == '<' && EndsWith(state, '<')) {
  390. Append(state, " ", 1);
  391. }
  392. // Remember the last identifier name for ctors/dtors,
  393. // but only if we haven't yet overflown the buffer.
  394. if (state->parse_state.out_cur_idx < state->out_end_idx &&
  395. (IsAlpha(str[0]) || str[0] == '_')) {
  396. state->parse_state.prev_name_idx = state->parse_state.out_cur_idx;
  397. state->parse_state.prev_name_length = static_cast<unsigned int>(length);
  398. }
  399. Append(state, str, length);
  400. }
  401. }
  402. // Appends a positive decimal number to the output if appending is enabled.
  403. static bool MaybeAppendDecimal(State *state, int val) {
  404. // Max {32-64}-bit unsigned int is 20 digits.
  405. constexpr size_t kMaxLength = 20;
  406. char buf[kMaxLength];
  407. // We can't use itoa or sprintf as neither is specified to be
  408. // async-signal-safe.
  409. if (state->parse_state.append) {
  410. // We can't have a one-before-the-beginning pointer, so instead start with
  411. // one-past-the-end and manipulate one character before the pointer.
  412. char *p = &buf[kMaxLength];
  413. do { // val=0 is the only input that should write a leading zero digit.
  414. *--p = static_cast<char>((val % 10) + '0');
  415. val /= 10;
  416. } while (p > buf && val != 0);
  417. // 'p' landed on the last character we set. How convenient.
  418. Append(state, p, kMaxLength - static_cast<size_t>(p - buf));
  419. }
  420. return true;
  421. }
  422. // A convenient wrapper around MaybeAppendWithLength().
  423. // Returns true so that it can be placed in "if" conditions.
  424. static bool MaybeAppend(State *state, const char *const str) {
  425. if (state->parse_state.append) {
  426. size_t length = StrLen(str);
  427. MaybeAppendWithLength(state, str, length);
  428. }
  429. return true;
  430. }
  431. // This function is used for handling nested names.
  432. static bool EnterNestedName(State *state) {
  433. state->parse_state.nest_level = 0;
  434. return true;
  435. }
  436. // This function is used for handling nested names.
  437. static bool LeaveNestedName(State *state, int16_t prev_value) {
  438. state->parse_state.nest_level = prev_value;
  439. return true;
  440. }
  441. // Disable the append mode not to print function parameters, etc.
  442. static bool DisableAppend(State *state) {
  443. state->parse_state.append = false;
  444. return true;
  445. }
  446. // Restore the append mode to the previous state.
  447. static bool RestoreAppend(State *state, bool prev_value) {
  448. state->parse_state.append = prev_value;
  449. return true;
  450. }
  451. // Increase the nest level for nested names.
  452. static void MaybeIncreaseNestLevel(State *state) {
  453. if (state->parse_state.nest_level > -1) {
  454. ++state->parse_state.nest_level;
  455. }
  456. }
  457. // Appends :: for nested names if necessary.
  458. static void MaybeAppendSeparator(State *state) {
  459. if (state->parse_state.nest_level >= 1) {
  460. MaybeAppend(state, "::");
  461. }
  462. }
  463. // Cancel the last separator if necessary.
  464. static void MaybeCancelLastSeparator(State *state) {
  465. if (state->parse_state.nest_level >= 1 && state->parse_state.append &&
  466. state->parse_state.out_cur_idx >= 2) {
  467. state->parse_state.out_cur_idx -= 2;
  468. state->out[state->parse_state.out_cur_idx] = '\0';
  469. }
  470. }
  471. // Returns true if the identifier of the given length pointed to by
  472. // "mangled_cur" is anonymous namespace.
  473. static bool IdentifierIsAnonymousNamespace(State *state, size_t length) {
  474. // Returns true if "anon_prefix" is a proper prefix of "mangled_cur".
  475. static const char anon_prefix[] = "_GLOBAL__N_";
  476. return (length > (sizeof(anon_prefix) - 1) &&
  477. StrPrefix(RemainingInput(state), anon_prefix));
  478. }
  479. // Forward declarations of our parsing functions.
  480. static bool ParseMangledName(State *state);
  481. static bool ParseEncoding(State *state);
  482. static bool ParseName(State *state);
  483. static bool ParseUnscopedName(State *state);
  484. static bool ParseNestedName(State *state);
  485. static bool ParsePrefix(State *state);
  486. static bool ParseUnqualifiedName(State *state);
  487. static bool ParseSourceName(State *state);
  488. static bool ParseLocalSourceName(State *state);
  489. static bool ParseUnnamedTypeName(State *state);
  490. static bool ParseNumber(State *state, int *number_out);
  491. static bool ParseFloatNumber(State *state);
  492. static bool ParseSeqId(State *state);
  493. static bool ParseIdentifier(State *state, size_t length);
  494. static bool ParseOperatorName(State *state, int *arity);
  495. static bool ParseSpecialName(State *state);
  496. static bool ParseCallOffset(State *state);
  497. static bool ParseNVOffset(State *state);
  498. static bool ParseVOffset(State *state);
  499. static bool ParseAbiTags(State *state);
  500. static bool ParseCtorDtorName(State *state);
  501. static bool ParseDecltype(State *state);
  502. static bool ParseType(State *state);
  503. static bool ParseCVQualifiers(State *state);
  504. static bool ParseBuiltinType(State *state);
  505. static bool ParseFunctionType(State *state);
  506. static bool ParseBareFunctionType(State *state);
  507. static bool ParseClassEnumType(State *state);
  508. static bool ParseArrayType(State *state);
  509. static bool ParsePointerToMemberType(State *state);
  510. static bool ParseTemplateParam(State *state);
  511. static bool ParseTemplateTemplateParam(State *state);
  512. static bool ParseTemplateArgs(State *state);
  513. static bool ParseTemplateArg(State *state);
  514. static bool ParseBaseUnresolvedName(State *state);
  515. static bool ParseUnresolvedName(State *state);
  516. static bool ParseExpression(State *state);
  517. static bool ParseExprPrimary(State *state);
  518. static bool ParseExprCastValue(State *state);
  519. static bool ParseLocalName(State *state);
  520. static bool ParseLocalNameSuffix(State *state);
  521. static bool ParseDiscriminator(State *state);
  522. static bool ParseSubstitution(State *state, bool accept_std);
  523. // Implementation note: the following code is a straightforward
  524. // translation of the Itanium C++ ABI defined in BNF with a couple of
  525. // exceptions.
  526. //
  527. // - Support GNU extensions not defined in the Itanium C++ ABI
  528. // - <prefix> and <template-prefix> are combined to avoid infinite loop
  529. // - Reorder patterns to shorten the code
  530. // - Reorder patterns to give greedier functions precedence
  531. // We'll mark "Less greedy than" for these cases in the code
  532. //
  533. // Each parsing function changes the parse state and returns true on
  534. // success, or returns false and doesn't change the parse state (note:
  535. // the parse-steps counter increases regardless of success or failure).
  536. // To ensure that the parse state isn't changed in the latter case, we
  537. // save the original state before we call multiple parsing functions
  538. // consecutively with &&, and restore it if unsuccessful. See
  539. // ParseEncoding() as an example of this convention. We follow the
  540. // convention throughout the code.
  541. //
  542. // Originally we tried to do demangling without following the full ABI
  543. // syntax but it turned out we needed to follow the full syntax to
  544. // parse complicated cases like nested template arguments. Note that
  545. // implementing a full-fledged demangler isn't trivial (libiberty's
  546. // cp-demangle.c has +4300 lines).
  547. //
  548. // Note that (foo) in <(foo) ...> is a modifier to be ignored.
  549. //
  550. // Reference:
  551. // - Itanium C++ ABI
  552. // <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling>
  553. // <mangled-name> ::= _Z <encoding>
  554. static bool ParseMangledName(State *state) {
  555. ComplexityGuard guard(state);
  556. if (guard.IsTooComplex()) return false;
  557. return ParseTwoCharToken(state, "_Z") && ParseEncoding(state);
  558. }
  559. // <encoding> ::= <(function) name> <bare-function-type>
  560. // ::= <(data) name>
  561. // ::= <special-name>
  562. static bool ParseEncoding(State *state) {
  563. ComplexityGuard guard(state);
  564. if (guard.IsTooComplex()) return false;
  565. // Implementing the first two productions together as <name>
  566. // [<bare-function-type>] avoids exponential blowup of backtracking.
  567. //
  568. // Since Optional(...) can't fail, there's no need to copy the state for
  569. // backtracking.
  570. if (ParseName(state) && Optional(ParseBareFunctionType(state))) {
  571. return true;
  572. }
  573. if (ParseSpecialName(state)) {
  574. return true;
  575. }
  576. return false;
  577. }
  578. // <name> ::= <nested-name>
  579. // ::= <unscoped-template-name> <template-args>
  580. // ::= <unscoped-name>
  581. // ::= <local-name>
  582. static bool ParseName(State *state) {
  583. ComplexityGuard guard(state);
  584. if (guard.IsTooComplex()) return false;
  585. if (ParseNestedName(state) || ParseLocalName(state)) {
  586. return true;
  587. }
  588. // We reorganize the productions to avoid re-parsing unscoped names.
  589. // - Inline <unscoped-template-name> productions:
  590. // <name> ::= <substitution> <template-args>
  591. // ::= <unscoped-name> <template-args>
  592. // ::= <unscoped-name>
  593. // - Merge the two productions that start with unscoped-name:
  594. // <name> ::= <unscoped-name> [<template-args>]
  595. ParseState copy = state->parse_state;
  596. // "std<...>" isn't a valid name.
  597. if (ParseSubstitution(state, /*accept_std=*/false) &&
  598. ParseTemplateArgs(state)) {
  599. return true;
  600. }
  601. state->parse_state = copy;
  602. // Note there's no need to restore state after this since only the first
  603. // subparser can fail.
  604. return ParseUnscopedName(state) && Optional(ParseTemplateArgs(state));
  605. }
  606. // <unscoped-name> ::= <unqualified-name>
  607. // ::= St <unqualified-name>
  608. static bool ParseUnscopedName(State *state) {
  609. ComplexityGuard guard(state);
  610. if (guard.IsTooComplex()) return false;
  611. if (ParseUnqualifiedName(state)) {
  612. return true;
  613. }
  614. ParseState copy = state->parse_state;
  615. if (ParseTwoCharToken(state, "St") && MaybeAppend(state, "std::") &&
  616. ParseUnqualifiedName(state)) {
  617. return true;
  618. }
  619. state->parse_state = copy;
  620. return false;
  621. }
  622. // <ref-qualifer> ::= R // lvalue method reference qualifier
  623. // ::= O // rvalue method reference qualifier
  624. static inline bool ParseRefQualifier(State *state) {
  625. return ParseCharClass(state, "OR");
  626. }
  627. // <nested-name> ::= N [<CV-qualifiers>] [<ref-qualifier>] <prefix>
  628. // <unqualified-name> E
  629. // ::= N [<CV-qualifiers>] [<ref-qualifier>] <template-prefix>
  630. // <template-args> E
  631. static bool ParseNestedName(State *state) {
  632. ComplexityGuard guard(state);
  633. if (guard.IsTooComplex()) return false;
  634. ParseState copy = state->parse_state;
  635. if (ParseOneCharToken(state, 'N') && EnterNestedName(state) &&
  636. Optional(ParseCVQualifiers(state)) &&
  637. Optional(ParseRefQualifier(state)) && ParsePrefix(state) &&
  638. LeaveNestedName(state, copy.nest_level) &&
  639. ParseOneCharToken(state, 'E')) {
  640. return true;
  641. }
  642. state->parse_state = copy;
  643. return false;
  644. }
  645. // This part is tricky. If we literally translate them to code, we'll
  646. // end up infinite loop. Hence we merge them to avoid the case.
  647. //
  648. // <prefix> ::= <prefix> <unqualified-name>
  649. // ::= <template-prefix> <template-args>
  650. // ::= <template-param>
  651. // ::= <substitution>
  652. // ::= # empty
  653. // <template-prefix> ::= <prefix> <(template) unqualified-name>
  654. // ::= <template-param>
  655. // ::= <substitution>
  656. static bool ParsePrefix(State *state) {
  657. ComplexityGuard guard(state);
  658. if (guard.IsTooComplex()) return false;
  659. bool has_something = false;
  660. while (true) {
  661. MaybeAppendSeparator(state);
  662. if (ParseTemplateParam(state) ||
  663. ParseSubstitution(state, /*accept_std=*/true) ||
  664. ParseUnscopedName(state) ||
  665. (ParseOneCharToken(state, 'M') && ParseUnnamedTypeName(state))) {
  666. has_something = true;
  667. MaybeIncreaseNestLevel(state);
  668. continue;
  669. }
  670. MaybeCancelLastSeparator(state);
  671. if (has_something && ParseTemplateArgs(state)) {
  672. return ParsePrefix(state);
  673. } else {
  674. break;
  675. }
  676. }
  677. return true;
  678. }
  679. // <unqualified-name> ::= <operator-name> [<abi-tags>]
  680. // ::= <ctor-dtor-name> [<abi-tags>]
  681. // ::= <source-name> [<abi-tags>]
  682. // ::= <local-source-name> [<abi-tags>]
  683. // ::= <unnamed-type-name> [<abi-tags>]
  684. //
  685. // <local-source-name> is a GCC extension; see below.
  686. static bool ParseUnqualifiedName(State *state) {
  687. ComplexityGuard guard(state);
  688. if (guard.IsTooComplex()) return false;
  689. if (ParseOperatorName(state, nullptr) || ParseCtorDtorName(state) ||
  690. ParseSourceName(state) || ParseLocalSourceName(state) ||
  691. ParseUnnamedTypeName(state)) {
  692. return ParseAbiTags(state);
  693. }
  694. return false;
  695. }
  696. // <abi-tags> ::= <abi-tag> [<abi-tags>]
  697. // <abi-tag> ::= B <source-name>
  698. static bool ParseAbiTags(State *state) {
  699. ComplexityGuard guard(state);
  700. if (guard.IsTooComplex()) return false;
  701. while (ParseOneCharToken(state, 'B')) {
  702. ParseState copy = state->parse_state;
  703. MaybeAppend(state, "[abi:");
  704. if (!ParseSourceName(state)) {
  705. state->parse_state = copy;
  706. return false;
  707. }
  708. MaybeAppend(state, "]");
  709. }
  710. return true;
  711. }
  712. // <source-name> ::= <positive length number> <identifier>
  713. static bool ParseSourceName(State *state) {
  714. ComplexityGuard guard(state);
  715. if (guard.IsTooComplex()) return false;
  716. ParseState copy = state->parse_state;
  717. int length = -1;
  718. if (ParseNumber(state, &length) &&
  719. ParseIdentifier(state, static_cast<size_t>(length))) {
  720. return true;
  721. }
  722. state->parse_state = copy;
  723. return false;
  724. }
  725. // <local-source-name> ::= L <source-name> [<discriminator>]
  726. //
  727. // References:
  728. // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=31775
  729. // https://gcc.gnu.org/viewcvs?view=rev&revision=124467
  730. static bool ParseLocalSourceName(State *state) {
  731. ComplexityGuard guard(state);
  732. if (guard.IsTooComplex()) return false;
  733. ParseState copy = state->parse_state;
  734. if (ParseOneCharToken(state, 'L') && ParseSourceName(state) &&
  735. Optional(ParseDiscriminator(state))) {
  736. return true;
  737. }
  738. state->parse_state = copy;
  739. return false;
  740. }
  741. // <unnamed-type-name> ::= Ut [<(nonnegative) number>] _
  742. // ::= <closure-type-name>
  743. // <closure-type-name> ::= Ul <lambda-sig> E [<(nonnegative) number>] _
  744. // <lambda-sig> ::= <(parameter) type>+
  745. static bool ParseUnnamedTypeName(State *state) {
  746. ComplexityGuard guard(state);
  747. if (guard.IsTooComplex()) return false;
  748. ParseState copy = state->parse_state;
  749. // Type's 1-based index n is encoded as { "", n == 1; itoa(n-2), otherwise }.
  750. // Optionally parse the encoded value into 'which' and add 2 to get the index.
  751. int which = -1;
  752. // Unnamed type local to function or class.
  753. if (ParseTwoCharToken(state, "Ut") && Optional(ParseNumber(state, &which)) &&
  754. which <= std::numeric_limits<int>::max() - 2 && // Don't overflow.
  755. ParseOneCharToken(state, '_')) {
  756. MaybeAppend(state, "{unnamed type#");
  757. MaybeAppendDecimal(state, 2 + which);
  758. MaybeAppend(state, "}");
  759. return true;
  760. }
  761. state->parse_state = copy;
  762. // Closure type.
  763. which = -1;
  764. if (ParseTwoCharToken(state, "Ul") && DisableAppend(state) &&
  765. OneOrMore(ParseType, state) && RestoreAppend(state, copy.append) &&
  766. ParseOneCharToken(state, 'E') && Optional(ParseNumber(state, &which)) &&
  767. which <= std::numeric_limits<int>::max() - 2 && // Don't overflow.
  768. ParseOneCharToken(state, '_')) {
  769. MaybeAppend(state, "{lambda()#");
  770. MaybeAppendDecimal(state, 2 + which);
  771. MaybeAppend(state, "}");
  772. return true;
  773. }
  774. state->parse_state = copy;
  775. return false;
  776. }
  777. // <number> ::= [n] <non-negative decimal integer>
  778. // If "number_out" is non-null, then *number_out is set to the value of the
  779. // parsed number on success.
  780. static bool ParseNumber(State *state, int *number_out) {
  781. ComplexityGuard guard(state);
  782. if (guard.IsTooComplex()) return false;
  783. bool negative = false;
  784. if (ParseOneCharToken(state, 'n')) {
  785. negative = true;
  786. }
  787. const char *p = RemainingInput(state);
  788. uint64_t number = 0;
  789. for (; *p != '\0'; ++p) {
  790. if (IsDigit(*p)) {
  791. number = number * 10 + static_cast<uint64_t>(*p - '0');
  792. } else {
  793. break;
  794. }
  795. }
  796. // Apply the sign with uint64_t arithmetic so overflows aren't UB. Gives
  797. // "incorrect" results for out-of-range inputs, but negative values only
  798. // appear for literals, which aren't printed.
  799. if (negative) {
  800. number = ~number + 1;
  801. }
  802. if (p != RemainingInput(state)) { // Conversion succeeded.
  803. state->parse_state.mangled_idx += p - RemainingInput(state);
  804. if (number_out != nullptr) {
  805. // Note: possibly truncate "number".
  806. *number_out = static_cast<int>(number);
  807. }
  808. return true;
  809. }
  810. return false;
  811. }
  812. // Floating-point literals are encoded using a fixed-length lowercase
  813. // hexadecimal string.
  814. static bool ParseFloatNumber(State *state) {
  815. ComplexityGuard guard(state);
  816. if (guard.IsTooComplex()) return false;
  817. const char *p = RemainingInput(state);
  818. for (; *p != '\0'; ++p) {
  819. if (!IsDigit(*p) && !(*p >= 'a' && *p <= 'f')) {
  820. break;
  821. }
  822. }
  823. if (p != RemainingInput(state)) { // Conversion succeeded.
  824. state->parse_state.mangled_idx += p - RemainingInput(state);
  825. return true;
  826. }
  827. return false;
  828. }
  829. // The <seq-id> is a sequence number in base 36,
  830. // using digits and upper case letters
  831. static bool ParseSeqId(State *state) {
  832. ComplexityGuard guard(state);
  833. if (guard.IsTooComplex()) return false;
  834. const char *p = RemainingInput(state);
  835. for (; *p != '\0'; ++p) {
  836. if (!IsDigit(*p) && !(*p >= 'A' && *p <= 'Z')) {
  837. break;
  838. }
  839. }
  840. if (p != RemainingInput(state)) { // Conversion succeeded.
  841. state->parse_state.mangled_idx += p - RemainingInput(state);
  842. return true;
  843. }
  844. return false;
  845. }
  846. // <identifier> ::= <unqualified source code identifier> (of given length)
  847. static bool ParseIdentifier(State *state, size_t length) {
  848. ComplexityGuard guard(state);
  849. if (guard.IsTooComplex()) return false;
  850. if (!AtLeastNumCharsRemaining(RemainingInput(state), length)) {
  851. return false;
  852. }
  853. if (IdentifierIsAnonymousNamespace(state, length)) {
  854. MaybeAppend(state, "(anonymous namespace)");
  855. } else {
  856. MaybeAppendWithLength(state, RemainingInput(state), length);
  857. }
  858. state->parse_state.mangled_idx += length;
  859. return true;
  860. }
  861. // <operator-name> ::= nw, and other two letters cases
  862. // ::= cv <type> # (cast)
  863. // ::= v <digit> <source-name> # vendor extended operator
  864. static bool ParseOperatorName(State *state, int *arity) {
  865. ComplexityGuard guard(state);
  866. if (guard.IsTooComplex()) return false;
  867. if (!AtLeastNumCharsRemaining(RemainingInput(state), 2)) {
  868. return false;
  869. }
  870. // First check with "cv" (cast) case.
  871. ParseState copy = state->parse_state;
  872. if (ParseTwoCharToken(state, "cv") && MaybeAppend(state, "operator ") &&
  873. EnterNestedName(state) && ParseType(state) &&
  874. LeaveNestedName(state, copy.nest_level)) {
  875. if (arity != nullptr) {
  876. *arity = 1;
  877. }
  878. return true;
  879. }
  880. state->parse_state = copy;
  881. // Then vendor extended operators.
  882. if (ParseOneCharToken(state, 'v') && ParseDigit(state, arity) &&
  883. ParseSourceName(state)) {
  884. return true;
  885. }
  886. state->parse_state = copy;
  887. // Other operator names should start with a lower alphabet followed
  888. // by a lower/upper alphabet.
  889. if (!(IsLower(RemainingInput(state)[0]) &&
  890. IsAlpha(RemainingInput(state)[1]))) {
  891. return false;
  892. }
  893. // We may want to perform a binary search if we really need speed.
  894. const AbbrevPair *p;
  895. for (p = kOperatorList; p->abbrev != nullptr; ++p) {
  896. if (RemainingInput(state)[0] == p->abbrev[0] &&
  897. RemainingInput(state)[1] == p->abbrev[1]) {
  898. if (arity != nullptr) {
  899. *arity = p->arity;
  900. }
  901. MaybeAppend(state, "operator");
  902. if (IsLower(*p->real_name)) { // new, delete, etc.
  903. MaybeAppend(state, " ");
  904. }
  905. MaybeAppend(state, p->real_name);
  906. state->parse_state.mangled_idx += 2;
  907. return true;
  908. }
  909. }
  910. return false;
  911. }
  912. // <special-name> ::= TV <type>
  913. // ::= TT <type>
  914. // ::= TI <type>
  915. // ::= TS <type>
  916. // ::= TH <type> # thread-local
  917. // ::= Tc <call-offset> <call-offset> <(base) encoding>
  918. // ::= GV <(object) name>
  919. // ::= T <call-offset> <(base) encoding>
  920. // G++ extensions:
  921. // ::= TC <type> <(offset) number> _ <(base) type>
  922. // ::= TF <type>
  923. // ::= TJ <type>
  924. // ::= GR <name>
  925. // ::= GA <encoding>
  926. // ::= Th <call-offset> <(base) encoding>
  927. // ::= Tv <call-offset> <(base) encoding>
  928. //
  929. // Note: we don't care much about them since they don't appear in
  930. // stack traces. The are special data.
  931. static bool ParseSpecialName(State *state) {
  932. ComplexityGuard guard(state);
  933. if (guard.IsTooComplex()) return false;
  934. ParseState copy = state->parse_state;
  935. if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "VTISH") &&
  936. ParseType(state)) {
  937. return true;
  938. }
  939. state->parse_state = copy;
  940. if (ParseTwoCharToken(state, "Tc") && ParseCallOffset(state) &&
  941. ParseCallOffset(state) && ParseEncoding(state)) {
  942. return true;
  943. }
  944. state->parse_state = copy;
  945. if (ParseTwoCharToken(state, "GV") && ParseName(state)) {
  946. return true;
  947. }
  948. state->parse_state = copy;
  949. if (ParseOneCharToken(state, 'T') && ParseCallOffset(state) &&
  950. ParseEncoding(state)) {
  951. return true;
  952. }
  953. state->parse_state = copy;
  954. // G++ extensions
  955. if (ParseTwoCharToken(state, "TC") && ParseType(state) &&
  956. ParseNumber(state, nullptr) && ParseOneCharToken(state, '_') &&
  957. DisableAppend(state) && ParseType(state)) {
  958. RestoreAppend(state, copy.append);
  959. return true;
  960. }
  961. state->parse_state = copy;
  962. if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "FJ") &&
  963. ParseType(state)) {
  964. return true;
  965. }
  966. state->parse_state = copy;
  967. if (ParseTwoCharToken(state, "GR") && ParseName(state)) {
  968. return true;
  969. }
  970. state->parse_state = copy;
  971. if (ParseTwoCharToken(state, "GA") && ParseEncoding(state)) {
  972. return true;
  973. }
  974. state->parse_state = copy;
  975. if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "hv") &&
  976. ParseCallOffset(state) && ParseEncoding(state)) {
  977. return true;
  978. }
  979. state->parse_state = copy;
  980. return false;
  981. }
  982. // <call-offset> ::= h <nv-offset> _
  983. // ::= v <v-offset> _
  984. static bool ParseCallOffset(State *state) {
  985. ComplexityGuard guard(state);
  986. if (guard.IsTooComplex()) return false;
  987. ParseState copy = state->parse_state;
  988. if (ParseOneCharToken(state, 'h') && ParseNVOffset(state) &&
  989. ParseOneCharToken(state, '_')) {
  990. return true;
  991. }
  992. state->parse_state = copy;
  993. if (ParseOneCharToken(state, 'v') && ParseVOffset(state) &&
  994. ParseOneCharToken(state, '_')) {
  995. return true;
  996. }
  997. state->parse_state = copy;
  998. return false;
  999. }
  1000. // <nv-offset> ::= <(offset) number>
  1001. static bool ParseNVOffset(State *state) {
  1002. ComplexityGuard guard(state);
  1003. if (guard.IsTooComplex()) return false;
  1004. return ParseNumber(state, nullptr);
  1005. }
  1006. // <v-offset> ::= <(offset) number> _ <(virtual offset) number>
  1007. static bool ParseVOffset(State *state) {
  1008. ComplexityGuard guard(state);
  1009. if (guard.IsTooComplex()) return false;
  1010. ParseState copy = state->parse_state;
  1011. if (ParseNumber(state, nullptr) && ParseOneCharToken(state, '_') &&
  1012. ParseNumber(state, nullptr)) {
  1013. return true;
  1014. }
  1015. state->parse_state = copy;
  1016. return false;
  1017. }
  1018. // <ctor-dtor-name> ::= C1 | C2 | C3 | CI1 <base-class-type> | CI2
  1019. // <base-class-type>
  1020. // ::= D0 | D1 | D2
  1021. // # GCC extensions: "unified" constructor/destructor. See
  1022. // #
  1023. // https://github.com/gcc-mirror/gcc/blob/7ad17b583c3643bd4557f29b8391ca7ef08391f5/gcc/cp/mangle.c#L1847
  1024. // ::= C4 | D4
  1025. static bool ParseCtorDtorName(State *state) {
  1026. ComplexityGuard guard(state);
  1027. if (guard.IsTooComplex()) return false;
  1028. ParseState copy = state->parse_state;
  1029. if (ParseOneCharToken(state, 'C')) {
  1030. if (ParseCharClass(state, "1234")) {
  1031. const char *const prev_name =
  1032. state->out + state->parse_state.prev_name_idx;
  1033. MaybeAppendWithLength(state, prev_name,
  1034. state->parse_state.prev_name_length);
  1035. return true;
  1036. } else if (ParseOneCharToken(state, 'I') && ParseCharClass(state, "12") &&
  1037. ParseClassEnumType(state)) {
  1038. return true;
  1039. }
  1040. }
  1041. state->parse_state = copy;
  1042. if (ParseOneCharToken(state, 'D') && ParseCharClass(state, "0124")) {
  1043. const char *const prev_name = state->out + state->parse_state.prev_name_idx;
  1044. MaybeAppend(state, "~");
  1045. MaybeAppendWithLength(state, prev_name,
  1046. state->parse_state.prev_name_length);
  1047. return true;
  1048. }
  1049. state->parse_state = copy;
  1050. return false;
  1051. }
  1052. // <decltype> ::= Dt <expression> E # decltype of an id-expression or class
  1053. // # member access (C++0x)
  1054. // ::= DT <expression> E # decltype of an expression (C++0x)
  1055. static bool ParseDecltype(State *state) {
  1056. ComplexityGuard guard(state);
  1057. if (guard.IsTooComplex()) return false;
  1058. ParseState copy = state->parse_state;
  1059. if (ParseOneCharToken(state, 'D') && ParseCharClass(state, "tT") &&
  1060. ParseExpression(state) && ParseOneCharToken(state, 'E')) {
  1061. return true;
  1062. }
  1063. state->parse_state = copy;
  1064. return false;
  1065. }
  1066. // <type> ::= <CV-qualifiers> <type>
  1067. // ::= P <type> # pointer-to
  1068. // ::= R <type> # reference-to
  1069. // ::= O <type> # rvalue reference-to (C++0x)
  1070. // ::= C <type> # complex pair (C 2000)
  1071. // ::= G <type> # imaginary (C 2000)
  1072. // ::= U <source-name> <type> # vendor extended type qualifier
  1073. // ::= <builtin-type>
  1074. // ::= <function-type>
  1075. // ::= <class-enum-type> # note: just an alias for <name>
  1076. // ::= <array-type>
  1077. // ::= <pointer-to-member-type>
  1078. // ::= <template-template-param> <template-args>
  1079. // ::= <template-param>
  1080. // ::= <decltype>
  1081. // ::= <substitution>
  1082. // ::= Dp <type> # pack expansion of (C++0x)
  1083. // ::= Dv <num-elems> _ # GNU vector extension
  1084. //
  1085. static bool ParseType(State *state) {
  1086. ComplexityGuard guard(state);
  1087. if (guard.IsTooComplex()) return false;
  1088. ParseState copy = state->parse_state;
  1089. // We should check CV-qualifers, and PRGC things first.
  1090. //
  1091. // CV-qualifiers overlap with some operator names, but an operator name is not
  1092. // valid as a type. To avoid an ambiguity that can lead to exponential time
  1093. // complexity, refuse to backtrack the CV-qualifiers.
  1094. //
  1095. // _Z4aoeuIrMvvE
  1096. // => _Z 4aoeuI rM v v E
  1097. // aoeu<operator%=, void, void>
  1098. // => _Z 4aoeuI r Mv v E
  1099. // aoeu<void void::* restrict>
  1100. //
  1101. // By consuming the CV-qualifiers first, the former parse is disabled.
  1102. if (ParseCVQualifiers(state)) {
  1103. const bool result = ParseType(state);
  1104. if (!result) state->parse_state = copy;
  1105. return result;
  1106. }
  1107. state->parse_state = copy;
  1108. // Similarly, these tag characters can overlap with other <name>s resulting in
  1109. // two different parse prefixes that land on <template-args> in the same
  1110. // place, such as "C3r1xI...". So, disable the "ctor-name = C3" parse by
  1111. // refusing to backtrack the tag characters.
  1112. if (ParseCharClass(state, "OPRCG")) {
  1113. const bool result = ParseType(state);
  1114. if (!result) state->parse_state = copy;
  1115. return result;
  1116. }
  1117. state->parse_state = copy;
  1118. if (ParseTwoCharToken(state, "Dp") && ParseType(state)) {
  1119. return true;
  1120. }
  1121. state->parse_state = copy;
  1122. if (ParseOneCharToken(state, 'U') && ParseSourceName(state) &&
  1123. ParseType(state)) {
  1124. return true;
  1125. }
  1126. state->parse_state = copy;
  1127. if (ParseBuiltinType(state) || ParseFunctionType(state) ||
  1128. ParseClassEnumType(state) || ParseArrayType(state) ||
  1129. ParsePointerToMemberType(state) || ParseDecltype(state) ||
  1130. // "std" on its own isn't a type.
  1131. ParseSubstitution(state, /*accept_std=*/false)) {
  1132. return true;
  1133. }
  1134. if (ParseTemplateTemplateParam(state) && ParseTemplateArgs(state)) {
  1135. return true;
  1136. }
  1137. state->parse_state = copy;
  1138. // Less greedy than <template-template-param> <template-args>.
  1139. if (ParseTemplateParam(state)) {
  1140. return true;
  1141. }
  1142. if (ParseTwoCharToken(state, "Dv") && ParseNumber(state, nullptr) &&
  1143. ParseOneCharToken(state, '_')) {
  1144. return true;
  1145. }
  1146. state->parse_state = copy;
  1147. return false;
  1148. }
  1149. // <CV-qualifiers> ::= [r] [V] [K]
  1150. // We don't allow empty <CV-qualifiers> to avoid infinite loop in
  1151. // ParseType().
  1152. static bool ParseCVQualifiers(State *state) {
  1153. ComplexityGuard guard(state);
  1154. if (guard.IsTooComplex()) return false;
  1155. int num_cv_qualifiers = 0;
  1156. num_cv_qualifiers += ParseOneCharToken(state, 'r');
  1157. num_cv_qualifiers += ParseOneCharToken(state, 'V');
  1158. num_cv_qualifiers += ParseOneCharToken(state, 'K');
  1159. return num_cv_qualifiers > 0;
  1160. }
  1161. // <builtin-type> ::= v, etc. # single-character builtin types
  1162. // ::= u <source-name>
  1163. // ::= Dd, etc. # two-character builtin types
  1164. //
  1165. // Not supported:
  1166. // ::= DF <number> _ # _FloatN (N bits)
  1167. //
  1168. static bool ParseBuiltinType(State *state) {
  1169. ComplexityGuard guard(state);
  1170. if (guard.IsTooComplex()) return false;
  1171. const AbbrevPair *p;
  1172. for (p = kBuiltinTypeList; p->abbrev != nullptr; ++p) {
  1173. // Guaranteed only 1- or 2-character strings in kBuiltinTypeList.
  1174. if (p->abbrev[1] == '\0') {
  1175. if (ParseOneCharToken(state, p->abbrev[0])) {
  1176. MaybeAppend(state, p->real_name);
  1177. return true;
  1178. }
  1179. } else if (p->abbrev[2] == '\0' && ParseTwoCharToken(state, p->abbrev)) {
  1180. MaybeAppend(state, p->real_name);
  1181. return true;
  1182. }
  1183. }
  1184. ParseState copy = state->parse_state;
  1185. if (ParseOneCharToken(state, 'u') && ParseSourceName(state)) {
  1186. return true;
  1187. }
  1188. state->parse_state = copy;
  1189. return false;
  1190. }
  1191. // <exception-spec> ::= Do # non-throwing
  1192. // exception-specification (e.g.,
  1193. // noexcept, throw())
  1194. // ::= DO <expression> E # computed (instantiation-dependent)
  1195. // noexcept
  1196. // ::= Dw <type>+ E # dynamic exception specification
  1197. // with instantiation-dependent types
  1198. static bool ParseExceptionSpec(State *state) {
  1199. ComplexityGuard guard(state);
  1200. if (guard.IsTooComplex()) return false;
  1201. if (ParseTwoCharToken(state, "Do")) return true;
  1202. ParseState copy = state->parse_state;
  1203. if (ParseTwoCharToken(state, "DO") && ParseExpression(state) &&
  1204. ParseOneCharToken(state, 'E')) {
  1205. return true;
  1206. }
  1207. state->parse_state = copy;
  1208. if (ParseTwoCharToken(state, "Dw") && OneOrMore(ParseType, state) &&
  1209. ParseOneCharToken(state, 'E')) {
  1210. return true;
  1211. }
  1212. state->parse_state = copy;
  1213. return false;
  1214. }
  1215. // <function-type> ::= [exception-spec] F [Y] <bare-function-type> [O] E
  1216. static bool ParseFunctionType(State *state) {
  1217. ComplexityGuard guard(state);
  1218. if (guard.IsTooComplex()) return false;
  1219. ParseState copy = state->parse_state;
  1220. if (Optional(ParseExceptionSpec(state)) && ParseOneCharToken(state, 'F') &&
  1221. Optional(ParseOneCharToken(state, 'Y')) && ParseBareFunctionType(state) &&
  1222. Optional(ParseOneCharToken(state, 'O')) &&
  1223. ParseOneCharToken(state, 'E')) {
  1224. return true;
  1225. }
  1226. state->parse_state = copy;
  1227. return false;
  1228. }
  1229. // <bare-function-type> ::= <(signature) type>+
  1230. static bool ParseBareFunctionType(State *state) {
  1231. ComplexityGuard guard(state);
  1232. if (guard.IsTooComplex()) return false;
  1233. ParseState copy = state->parse_state;
  1234. DisableAppend(state);
  1235. if (OneOrMore(ParseType, state)) {
  1236. RestoreAppend(state, copy.append);
  1237. MaybeAppend(state, "()");
  1238. return true;
  1239. }
  1240. state->parse_state = copy;
  1241. return false;
  1242. }
  1243. // <class-enum-type> ::= <name>
  1244. static bool ParseClassEnumType(State *state) {
  1245. ComplexityGuard guard(state);
  1246. if (guard.IsTooComplex()) return false;
  1247. return ParseName(state);
  1248. }
  1249. // <array-type> ::= A <(positive dimension) number> _ <(element) type>
  1250. // ::= A [<(dimension) expression>] _ <(element) type>
  1251. static bool ParseArrayType(State *state) {
  1252. ComplexityGuard guard(state);
  1253. if (guard.IsTooComplex()) return false;
  1254. ParseState copy = state->parse_state;
  1255. if (ParseOneCharToken(state, 'A') && ParseNumber(state, nullptr) &&
  1256. ParseOneCharToken(state, '_') && ParseType(state)) {
  1257. return true;
  1258. }
  1259. state->parse_state = copy;
  1260. if (ParseOneCharToken(state, 'A') && Optional(ParseExpression(state)) &&
  1261. ParseOneCharToken(state, '_') && ParseType(state)) {
  1262. return true;
  1263. }
  1264. state->parse_state = copy;
  1265. return false;
  1266. }
  1267. // <pointer-to-member-type> ::= M <(class) type> <(member) type>
  1268. static bool ParsePointerToMemberType(State *state) {
  1269. ComplexityGuard guard(state);
  1270. if (guard.IsTooComplex()) return false;
  1271. ParseState copy = state->parse_state;
  1272. if (ParseOneCharToken(state, 'M') && ParseType(state) && ParseType(state)) {
  1273. return true;
  1274. }
  1275. state->parse_state = copy;
  1276. return false;
  1277. }
  1278. // <template-param> ::= T_
  1279. // ::= T <parameter-2 non-negative number> _
  1280. static bool ParseTemplateParam(State *state) {
  1281. ComplexityGuard guard(state);
  1282. if (guard.IsTooComplex()) return false;
  1283. if (ParseTwoCharToken(state, "T_")) {
  1284. MaybeAppend(state, "?"); // We don't support template substitutions.
  1285. return true;
  1286. }
  1287. ParseState copy = state->parse_state;
  1288. if (ParseOneCharToken(state, 'T') && ParseNumber(state, nullptr) &&
  1289. ParseOneCharToken(state, '_')) {
  1290. MaybeAppend(state, "?"); // We don't support template substitutions.
  1291. return true;
  1292. }
  1293. state->parse_state = copy;
  1294. return false;
  1295. }
  1296. // <template-template-param> ::= <template-param>
  1297. // ::= <substitution>
  1298. static bool ParseTemplateTemplateParam(State *state) {
  1299. ComplexityGuard guard(state);
  1300. if (guard.IsTooComplex()) return false;
  1301. return (ParseTemplateParam(state) ||
  1302. // "std" on its own isn't a template.
  1303. ParseSubstitution(state, /*accept_std=*/false));
  1304. }
  1305. // <template-args> ::= I <template-arg>+ E
  1306. static bool ParseTemplateArgs(State *state) {
  1307. ComplexityGuard guard(state);
  1308. if (guard.IsTooComplex()) return false;
  1309. ParseState copy = state->parse_state;
  1310. DisableAppend(state);
  1311. if (ParseOneCharToken(state, 'I') && OneOrMore(ParseTemplateArg, state) &&
  1312. ParseOneCharToken(state, 'E')) {
  1313. RestoreAppend(state, copy.append);
  1314. MaybeAppend(state, "<>");
  1315. return true;
  1316. }
  1317. state->parse_state = copy;
  1318. return false;
  1319. }
  1320. // <template-arg> ::= <type>
  1321. // ::= <expr-primary>
  1322. // ::= J <template-arg>* E # argument pack
  1323. // ::= X <expression> E
  1324. static bool ParseTemplateArg(State *state) {
  1325. ComplexityGuard guard(state);
  1326. if (guard.IsTooComplex()) return false;
  1327. ParseState copy = state->parse_state;
  1328. if (ParseOneCharToken(state, 'J') && ZeroOrMore(ParseTemplateArg, state) &&
  1329. ParseOneCharToken(state, 'E')) {
  1330. return true;
  1331. }
  1332. state->parse_state = copy;
  1333. // There can be significant overlap between the following leading to
  1334. // exponential backtracking:
  1335. //
  1336. // <expr-primary> ::= L <type> <expr-cast-value> E
  1337. // e.g. L 2xxIvE 1 E
  1338. // <type> ==> <local-source-name> <template-args>
  1339. // e.g. L 2xx IvE
  1340. //
  1341. // This means parsing an entire <type> twice, and <type> can contain
  1342. // <template-arg>, so this can generate exponential backtracking. There is
  1343. // only overlap when the remaining input starts with "L <source-name>", so
  1344. // parse all cases that can start this way jointly to share the common prefix.
  1345. //
  1346. // We have:
  1347. //
  1348. // <template-arg> ::= <type>
  1349. // ::= <expr-primary>
  1350. //
  1351. // First, drop all the productions of <type> that must start with something
  1352. // other than 'L'. All that's left is <class-enum-type>; inline it.
  1353. //
  1354. // <type> ::= <nested-name> # starts with 'N'
  1355. // ::= <unscoped-name>
  1356. // ::= <unscoped-template-name> <template-args>
  1357. // ::= <local-name> # starts with 'Z'
  1358. //
  1359. // Drop and inline again:
  1360. //
  1361. // <type> ::= <unscoped-name>
  1362. // ::= <unscoped-name> <template-args>
  1363. // ::= <substitution> <template-args> # starts with 'S'
  1364. //
  1365. // Merge the first two, inline <unscoped-name>, drop last:
  1366. //
  1367. // <type> ::= <unqualified-name> [<template-args>]
  1368. // ::= St <unqualified-name> [<template-args>] # starts with 'S'
  1369. //
  1370. // Drop and inline:
  1371. //
  1372. // <type> ::= <operator-name> [<template-args>] # starts with lowercase
  1373. // ::= <ctor-dtor-name> [<template-args>] # starts with 'C' or 'D'
  1374. // ::= <source-name> [<template-args>] # starts with digit
  1375. // ::= <local-source-name> [<template-args>]
  1376. // ::= <unnamed-type-name> [<template-args>] # starts with 'U'
  1377. //
  1378. // One more time:
  1379. //
  1380. // <type> ::= L <source-name> [<template-args>]
  1381. //
  1382. // Likewise with <expr-primary>:
  1383. //
  1384. // <expr-primary> ::= L <type> <expr-cast-value> E
  1385. // ::= LZ <encoding> E # cannot overlap; drop
  1386. // ::= L <mangled_name> E # cannot overlap; drop
  1387. //
  1388. // By similar reasoning as shown above, the only <type>s starting with
  1389. // <source-name> are "<source-name> [<template-args>]". Inline this.
  1390. //
  1391. // <expr-primary> ::= L <source-name> [<template-args>] <expr-cast-value> E
  1392. //
  1393. // Now inline both of these into <template-arg>:
  1394. //
  1395. // <template-arg> ::= L <source-name> [<template-args>]
  1396. // ::= L <source-name> [<template-args>] <expr-cast-value> E
  1397. //
  1398. // Merge them and we're done:
  1399. // <template-arg>
  1400. // ::= L <source-name> [<template-args>] [<expr-cast-value> E]
  1401. if (ParseLocalSourceName(state) && Optional(ParseTemplateArgs(state))) {
  1402. copy = state->parse_state;
  1403. if (ParseExprCastValue(state) && ParseOneCharToken(state, 'E')) {
  1404. return true;
  1405. }
  1406. state->parse_state = copy;
  1407. return true;
  1408. }
  1409. // Now that the overlapping cases can't reach this code, we can safely call
  1410. // both of these.
  1411. if (ParseType(state) || ParseExprPrimary(state)) {
  1412. return true;
  1413. }
  1414. state->parse_state = copy;
  1415. if (ParseOneCharToken(state, 'X') && ParseExpression(state) &&
  1416. ParseOneCharToken(state, 'E')) {
  1417. return true;
  1418. }
  1419. state->parse_state = copy;
  1420. return false;
  1421. }
  1422. // <unresolved-type> ::= <template-param> [<template-args>]
  1423. // ::= <decltype>
  1424. // ::= <substitution>
  1425. static inline bool ParseUnresolvedType(State *state) {
  1426. // No ComplexityGuard because we don't copy the state in this stack frame.
  1427. return (ParseTemplateParam(state) && Optional(ParseTemplateArgs(state))) ||
  1428. ParseDecltype(state) || ParseSubstitution(state, /*accept_std=*/false);
  1429. }
  1430. // <simple-id> ::= <source-name> [<template-args>]
  1431. static inline bool ParseSimpleId(State *state) {
  1432. // No ComplexityGuard because we don't copy the state in this stack frame.
  1433. // Note: <simple-id> cannot be followed by a parameter pack; see comment in
  1434. // ParseUnresolvedType.
  1435. return ParseSourceName(state) && Optional(ParseTemplateArgs(state));
  1436. }
  1437. // <base-unresolved-name> ::= <source-name> [<template-args>]
  1438. // ::= on <operator-name> [<template-args>]
  1439. // ::= dn <destructor-name>
  1440. static bool ParseBaseUnresolvedName(State *state) {
  1441. ComplexityGuard guard(state);
  1442. if (guard.IsTooComplex()) return false;
  1443. if (ParseSimpleId(state)) {
  1444. return true;
  1445. }
  1446. ParseState copy = state->parse_state;
  1447. if (ParseTwoCharToken(state, "on") && ParseOperatorName(state, nullptr) &&
  1448. Optional(ParseTemplateArgs(state))) {
  1449. return true;
  1450. }
  1451. state->parse_state = copy;
  1452. if (ParseTwoCharToken(state, "dn") &&
  1453. (ParseUnresolvedType(state) || ParseSimpleId(state))) {
  1454. return true;
  1455. }
  1456. state->parse_state = copy;
  1457. return false;
  1458. }
  1459. // <unresolved-name> ::= [gs] <base-unresolved-name>
  1460. // ::= sr <unresolved-type> <base-unresolved-name>
  1461. // ::= srN <unresolved-type> <unresolved-qualifier-level>+ E
  1462. // <base-unresolved-name>
  1463. // ::= [gs] sr <unresolved-qualifier-level>+ E
  1464. // <base-unresolved-name>
  1465. static bool ParseUnresolvedName(State *state) {
  1466. ComplexityGuard guard(state);
  1467. if (guard.IsTooComplex()) return false;
  1468. ParseState copy = state->parse_state;
  1469. if (Optional(ParseTwoCharToken(state, "gs")) &&
  1470. ParseBaseUnresolvedName(state)) {
  1471. return true;
  1472. }
  1473. state->parse_state = copy;
  1474. if (ParseTwoCharToken(state, "sr") && ParseUnresolvedType(state) &&
  1475. ParseBaseUnresolvedName(state)) {
  1476. return true;
  1477. }
  1478. state->parse_state = copy;
  1479. if (ParseTwoCharToken(state, "sr") && ParseOneCharToken(state, 'N') &&
  1480. ParseUnresolvedType(state) &&
  1481. OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) &&
  1482. ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) {
  1483. return true;
  1484. }
  1485. state->parse_state = copy;
  1486. if (Optional(ParseTwoCharToken(state, "gs")) &&
  1487. ParseTwoCharToken(state, "sr") &&
  1488. OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) &&
  1489. ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) {
  1490. return true;
  1491. }
  1492. state->parse_state = copy;
  1493. return false;
  1494. }
  1495. // <expression> ::= <1-ary operator-name> <expression>
  1496. // ::= <2-ary operator-name> <expression> <expression>
  1497. // ::= <3-ary operator-name> <expression> <expression> <expression>
  1498. // ::= cl <expression>+ E
  1499. // ::= cp <simple-id> <expression>* E # Clang-specific.
  1500. // ::= cv <type> <expression> # type (expression)
  1501. // ::= cv <type> _ <expression>* E # type (expr-list)
  1502. // ::= st <type>
  1503. // ::= <template-param>
  1504. // ::= <function-param>
  1505. // ::= <expr-primary>
  1506. // ::= dt <expression> <unresolved-name> # expr.name
  1507. // ::= pt <expression> <unresolved-name> # expr->name
  1508. // ::= sp <expression> # argument pack expansion
  1509. // ::= sr <type> <unqualified-name> <template-args>
  1510. // ::= sr <type> <unqualified-name>
  1511. // <function-param> ::= fp <(top-level) CV-qualifiers> _
  1512. // ::= fp <(top-level) CV-qualifiers> <number> _
  1513. // ::= fL <number> p <(top-level) CV-qualifiers> _
  1514. // ::= fL <number> p <(top-level) CV-qualifiers> <number> _
  1515. static bool ParseExpression(State *state) {
  1516. ComplexityGuard guard(state);
  1517. if (guard.IsTooComplex()) return false;
  1518. if (ParseTemplateParam(state) || ParseExprPrimary(state)) {
  1519. return true;
  1520. }
  1521. ParseState copy = state->parse_state;
  1522. // Object/function call expression.
  1523. if (ParseTwoCharToken(state, "cl") && OneOrMore(ParseExpression, state) &&
  1524. ParseOneCharToken(state, 'E')) {
  1525. return true;
  1526. }
  1527. state->parse_state = copy;
  1528. // Clang-specific "cp <simple-id> <expression>* E"
  1529. // https://clang.llvm.org/doxygen/ItaniumMangle_8cpp_source.html#l04338
  1530. if (ParseTwoCharToken(state, "cp") && ParseSimpleId(state) &&
  1531. ZeroOrMore(ParseExpression, state) && ParseOneCharToken(state, 'E')) {
  1532. return true;
  1533. }
  1534. state->parse_state = copy;
  1535. // Function-param expression (level 0).
  1536. if (ParseTwoCharToken(state, "fp") && Optional(ParseCVQualifiers(state)) &&
  1537. Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
  1538. return true;
  1539. }
  1540. state->parse_state = copy;
  1541. // Function-param expression (level 1+).
  1542. if (ParseTwoCharToken(state, "fL") && Optional(ParseNumber(state, nullptr)) &&
  1543. ParseOneCharToken(state, 'p') && Optional(ParseCVQualifiers(state)) &&
  1544. Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
  1545. return true;
  1546. }
  1547. state->parse_state = copy;
  1548. // Parse the conversion expressions jointly to avoid re-parsing the <type> in
  1549. // their common prefix. Parsed as:
  1550. // <expression> ::= cv <type> <conversion-args>
  1551. // <conversion-args> ::= _ <expression>* E
  1552. // ::= <expression>
  1553. //
  1554. // Also don't try ParseOperatorName after seeing "cv", since ParseOperatorName
  1555. // also needs to accept "cv <type>" in other contexts.
  1556. if (ParseTwoCharToken(state, "cv")) {
  1557. if (ParseType(state)) {
  1558. ParseState copy2 = state->parse_state;
  1559. if (ParseOneCharToken(state, '_') && ZeroOrMore(ParseExpression, state) &&
  1560. ParseOneCharToken(state, 'E')) {
  1561. return true;
  1562. }
  1563. state->parse_state = copy2;
  1564. if (ParseExpression(state)) {
  1565. return true;
  1566. }
  1567. }
  1568. } else {
  1569. // Parse unary, binary, and ternary operator expressions jointly, taking
  1570. // care not to re-parse subexpressions repeatedly. Parse like:
  1571. // <expression> ::= <operator-name> <expression>
  1572. // [<one-to-two-expressions>]
  1573. // <one-to-two-expressions> ::= <expression> [<expression>]
  1574. int arity = -1;
  1575. if (ParseOperatorName(state, &arity) &&
  1576. arity > 0 && // 0 arity => disabled.
  1577. (arity < 3 || ParseExpression(state)) &&
  1578. (arity < 2 || ParseExpression(state)) &&
  1579. (arity < 1 || ParseExpression(state))) {
  1580. return true;
  1581. }
  1582. }
  1583. state->parse_state = copy;
  1584. // sizeof type
  1585. if (ParseTwoCharToken(state, "st") && ParseType(state)) {
  1586. return true;
  1587. }
  1588. state->parse_state = copy;
  1589. // Object and pointer member access expressions.
  1590. if ((ParseTwoCharToken(state, "dt") || ParseTwoCharToken(state, "pt")) &&
  1591. ParseExpression(state) && ParseType(state)) {
  1592. return true;
  1593. }
  1594. state->parse_state = copy;
  1595. // Pointer-to-member access expressions. This parses the same as a binary
  1596. // operator, but it's implemented separately because "ds" shouldn't be
  1597. // accepted in other contexts that parse an operator name.
  1598. if (ParseTwoCharToken(state, "ds") && ParseExpression(state) &&
  1599. ParseExpression(state)) {
  1600. return true;
  1601. }
  1602. state->parse_state = copy;
  1603. // Parameter pack expansion
  1604. if (ParseTwoCharToken(state, "sp") && ParseExpression(state)) {
  1605. return true;
  1606. }
  1607. state->parse_state = copy;
  1608. return ParseUnresolvedName(state);
  1609. }
  1610. // <expr-primary> ::= L <type> <(value) number> E
  1611. // ::= L <type> <(value) float> E
  1612. // ::= L <mangled-name> E
  1613. // // A bug in g++'s C++ ABI version 2 (-fabi-version=2).
  1614. // ::= LZ <encoding> E
  1615. //
  1616. // Warning, subtle: the "bug" LZ production above is ambiguous with the first
  1617. // production where <type> starts with <local-name>, which can lead to
  1618. // exponential backtracking in two scenarios:
  1619. //
  1620. // - When whatever follows the E in the <local-name> in the first production is
  1621. // not a name, we backtrack the whole <encoding> and re-parse the whole thing.
  1622. //
  1623. // - When whatever follows the <local-name> in the first production is not a
  1624. // number and this <expr-primary> may be followed by a name, we backtrack the
  1625. // <name> and re-parse it.
  1626. //
  1627. // Moreover this ambiguity isn't always resolved -- for example, the following
  1628. // has two different parses:
  1629. //
  1630. // _ZaaILZ4aoeuE1x1EvE
  1631. // => operator&&<aoeu, x, E, void>
  1632. // => operator&&<(aoeu::x)(1), void>
  1633. //
  1634. // To resolve this, we just do what GCC's demangler does, and refuse to parse
  1635. // casts to <local-name> types.
  1636. static bool ParseExprPrimary(State *state) {
  1637. ComplexityGuard guard(state);
  1638. if (guard.IsTooComplex()) return false;
  1639. ParseState copy = state->parse_state;
  1640. // The "LZ" special case: if we see LZ, we commit to accept "LZ <encoding> E"
  1641. // or fail, no backtracking.
  1642. if (ParseTwoCharToken(state, "LZ")) {
  1643. if (ParseEncoding(state) && ParseOneCharToken(state, 'E')) {
  1644. return true;
  1645. }
  1646. state->parse_state = copy;
  1647. return false;
  1648. }
  1649. // The merged cast production.
  1650. if (ParseOneCharToken(state, 'L') && ParseType(state) &&
  1651. ParseExprCastValue(state)) {
  1652. return true;
  1653. }
  1654. state->parse_state = copy;
  1655. if (ParseOneCharToken(state, 'L') && ParseMangledName(state) &&
  1656. ParseOneCharToken(state, 'E')) {
  1657. return true;
  1658. }
  1659. state->parse_state = copy;
  1660. return false;
  1661. }
  1662. // <number> or <float>, followed by 'E', as described above ParseExprPrimary.
  1663. static bool ParseExprCastValue(State *state) {
  1664. ComplexityGuard guard(state);
  1665. if (guard.IsTooComplex()) return false;
  1666. // We have to be able to backtrack after accepting a number because we could
  1667. // have e.g. "7fffE", which will accept "7" as a number but then fail to find
  1668. // the 'E'.
  1669. ParseState copy = state->parse_state;
  1670. if (ParseNumber(state, nullptr) && ParseOneCharToken(state, 'E')) {
  1671. return true;
  1672. }
  1673. state->parse_state = copy;
  1674. if (ParseFloatNumber(state) && ParseOneCharToken(state, 'E')) {
  1675. return true;
  1676. }
  1677. state->parse_state = copy;
  1678. return false;
  1679. }
  1680. // <local-name> ::= Z <(function) encoding> E <(entity) name> [<discriminator>]
  1681. // ::= Z <(function) encoding> E s [<discriminator>]
  1682. //
  1683. // Parsing a common prefix of these two productions together avoids an
  1684. // exponential blowup of backtracking. Parse like:
  1685. // <local-name> := Z <encoding> E <local-name-suffix>
  1686. // <local-name-suffix> ::= s [<discriminator>]
  1687. // ::= <name> [<discriminator>]
  1688. static bool ParseLocalNameSuffix(State *state) {
  1689. ComplexityGuard guard(state);
  1690. if (guard.IsTooComplex()) return false;
  1691. if (MaybeAppend(state, "::") && ParseName(state) &&
  1692. Optional(ParseDiscriminator(state))) {
  1693. return true;
  1694. }
  1695. // Since we're not going to overwrite the above "::" by re-parsing the
  1696. // <encoding> (whose trailing '\0' byte was in the byte now holding the
  1697. // first ':'), we have to rollback the "::" if the <name> parse failed.
  1698. if (state->parse_state.append) {
  1699. state->out[state->parse_state.out_cur_idx - 2] = '\0';
  1700. }
  1701. return ParseOneCharToken(state, 's') && Optional(ParseDiscriminator(state));
  1702. }
  1703. static bool ParseLocalName(State *state) {
  1704. ComplexityGuard guard(state);
  1705. if (guard.IsTooComplex()) return false;
  1706. ParseState copy = state->parse_state;
  1707. if (ParseOneCharToken(state, 'Z') && ParseEncoding(state) &&
  1708. ParseOneCharToken(state, 'E') && ParseLocalNameSuffix(state)) {
  1709. return true;
  1710. }
  1711. state->parse_state = copy;
  1712. return false;
  1713. }
  1714. // <discriminator> := _ <(non-negative) number>
  1715. static bool ParseDiscriminator(State *state) {
  1716. ComplexityGuard guard(state);
  1717. if (guard.IsTooComplex()) return false;
  1718. ParseState copy = state->parse_state;
  1719. if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr)) {
  1720. return true;
  1721. }
  1722. state->parse_state = copy;
  1723. return false;
  1724. }
  1725. // <substitution> ::= S_
  1726. // ::= S <seq-id> _
  1727. // ::= St, etc.
  1728. //
  1729. // "St" is special in that it's not valid as a standalone name, and it *is*
  1730. // allowed to precede a name without being wrapped in "N...E". This means that
  1731. // if we accept it on its own, we can accept "St1a" and try to parse
  1732. // template-args, then fail and backtrack, accept "St" on its own, then "1a" as
  1733. // an unqualified name and re-parse the same template-args. To block this
  1734. // exponential backtracking, we disable it with 'accept_std=false' in
  1735. // problematic contexts.
  1736. static bool ParseSubstitution(State *state, bool accept_std) {
  1737. ComplexityGuard guard(state);
  1738. if (guard.IsTooComplex()) return false;
  1739. if (ParseTwoCharToken(state, "S_")) {
  1740. MaybeAppend(state, "?"); // We don't support substitutions.
  1741. return true;
  1742. }
  1743. ParseState copy = state->parse_state;
  1744. if (ParseOneCharToken(state, 'S') && ParseSeqId(state) &&
  1745. ParseOneCharToken(state, '_')) {
  1746. MaybeAppend(state, "?"); // We don't support substitutions.
  1747. return true;
  1748. }
  1749. state->parse_state = copy;
  1750. // Expand abbreviations like "St" => "std".
  1751. if (ParseOneCharToken(state, 'S')) {
  1752. const AbbrevPair *p;
  1753. for (p = kSubstitutionList; p->abbrev != nullptr; ++p) {
  1754. if (RemainingInput(state)[0] == p->abbrev[1] &&
  1755. (accept_std || p->abbrev[1] != 't')) {
  1756. MaybeAppend(state, "std");
  1757. if (p->real_name[0] != '\0') {
  1758. MaybeAppend(state, "::");
  1759. MaybeAppend(state, p->real_name);
  1760. }
  1761. ++state->parse_state.mangled_idx;
  1762. return true;
  1763. }
  1764. }
  1765. }
  1766. state->parse_state = copy;
  1767. return false;
  1768. }
  1769. // Parse <mangled-name>, optionally followed by either a function-clone suffix
  1770. // or version suffix. Returns true only if all of "mangled_cur" was consumed.
  1771. static bool ParseTopLevelMangledName(State *state) {
  1772. ComplexityGuard guard(state);
  1773. if (guard.IsTooComplex()) return false;
  1774. if (ParseMangledName(state)) {
  1775. if (RemainingInput(state)[0] != '\0') {
  1776. // Drop trailing function clone suffix, if any.
  1777. if (IsFunctionCloneSuffix(RemainingInput(state))) {
  1778. return true;
  1779. }
  1780. // Append trailing version suffix if any.
  1781. // ex. _Z3foo@@GLIBCXX_3.4
  1782. if (RemainingInput(state)[0] == '@') {
  1783. MaybeAppend(state, RemainingInput(state));
  1784. return true;
  1785. }
  1786. return false; // Unconsumed suffix.
  1787. }
  1788. return true;
  1789. }
  1790. return false;
  1791. }
  1792. static bool Overflowed(const State *state) {
  1793. return state->parse_state.out_cur_idx >= state->out_end_idx;
  1794. }
  1795. // The demangler entry point.
  1796. bool Demangle(const char* mangled, char* out, size_t out_size) {
  1797. State state;
  1798. InitState(&state, mangled, out, out_size);
  1799. return ParseTopLevelMangledName(&state) && !Overflowed(&state) &&
  1800. state.parse_state.out_cur_idx > 0;
  1801. }
  1802. } // namespace debugging_internal
  1803. Y_ABSL_NAMESPACE_END
  1804. } // namespace y_absl