functions.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. from .platform import get_behavior_family_for_platform
  2. import re
  3. _windecl_hash = re.compile(r"^@?(.*?)@[0-9]+$")
  4. _rust_hash = re.compile(r"::h[a-z0-9]{16}$")
  5. _cpp_trailer_re = re.compile(r"(\bconst\b|&)$")
  6. _rust_blanket_re = re.compile(r"^([A-Z] as )")
  7. _lambda_re = re.compile(
  8. r"""(?x)
  9. # gcc
  10. (?:
  11. \{
  12. lambda\(.*?\)\#\d+
  13. \}
  14. ) |
  15. # msvc
  16. (?:
  17. \blambda_[a-f0-9]{32}\b
  18. ) |
  19. # clang
  20. (?:
  21. \$_\d+\b
  22. )
  23. """
  24. )
  25. _anon_namespace_re = re.compile(
  26. r"""(?x)
  27. \?A0x[a-f0-9]{8}::
  28. """
  29. )
  30. PAIRS = {"(": ")", "{": "}", "[": "]", "<": ">"}
  31. def replace_enclosed_string(s, start, end, replacement=None):
  32. if start not in s:
  33. return s
  34. depth = 0
  35. rv = []
  36. pair_start = None
  37. for idx, char in enumerate(s):
  38. if char == start:
  39. if depth == 0:
  40. pair_start = idx
  41. depth += 1
  42. elif char == end:
  43. depth -= 1
  44. if depth == 0:
  45. if replacement is not None:
  46. if callable(replacement):
  47. rv.append(replacement(s[pair_start + 1 : idx], pair_start))
  48. else:
  49. rv.append(replacement)
  50. elif depth == 0:
  51. rv.append(char)
  52. return "".join(rv)
  53. def split_func_tokens(s):
  54. buf = []
  55. rv = []
  56. stack = []
  57. end = 0
  58. for idx, char in enumerate(s):
  59. if char in PAIRS:
  60. stack.append(PAIRS[char])
  61. elif stack and char == stack[-1]:
  62. stack.pop()
  63. if not stack:
  64. buf.append(s[end : idx + 1])
  65. end = idx + 1
  66. elif not stack:
  67. if char.isspace():
  68. if buf:
  69. rv.append(buf)
  70. buf = []
  71. else:
  72. buf.append(s[end : idx + 1])
  73. end = idx + 1
  74. if buf:
  75. rv.append(buf)
  76. return ["".join(x) for x in rv]
  77. def trim_function_name(function, platform, normalize_lambdas=True):
  78. """Given a function value from the frame's function attribute this returns
  79. a trimmed version that can be stored in `function_name`. This is only used
  80. if the client did not supply a value itself already.
  81. """
  82. if get_behavior_family_for_platform(platform) != "native":
  83. return function
  84. if function in ("<redacted>", "<unknown>"):
  85. return function
  86. original_function = function
  87. function = function.strip()
  88. # Ensure we don't operate on objc functions
  89. if function.startswith(("[", "+[", "-[")):
  90. return function
  91. # Chop off C++ trailers
  92. while True:
  93. match = _cpp_trailer_re.search(function)
  94. if match is None:
  95. break
  96. function = function[: match.start()].rstrip()
  97. # Because operator<< really screws with our balancing, so let's work
  98. # around that by replacing it with a character we do not observe in
  99. # `split_func_tokens` or `replace_enclosed_string`.
  100. function = (
  101. function.replace("operator<<", u"operator⟨⟨")
  102. .replace("operator<", u"operator⟨")
  103. .replace("operator()", u"operator◯")
  104. .replace(" -> ", u" ⟿ ")
  105. .replace("`anonymous namespace'", u"〔anonymousnamespace〕")
  106. )
  107. # normalize C++ lambdas. This is necessary because different
  108. # compilers use different rules for now to name a lambda and they are
  109. # all quite inconsistent. This does not give us perfect answers to
  110. # this problem but closer. In particular msvc will call a lambda
  111. # something like `lambda_deadbeefeefffeeffeeff` whereas clang for
  112. # instance will name it `main::$_0` which will tell us in which outer
  113. # function it was declared.
  114. if normalize_lambdas:
  115. function = _lambda_re.sub("lambda", function)
  116. # Normalize MSVC anonymous namespaces from inline functions. For inline
  117. # functions, the compiler inconsistently renders anonymous namespaces with
  118. # their hash. For regular functions, "`anonymous namespace'" is used.
  119. # The regular expression matches the trailing "::" to avoid accidental
  120. # replacement in mangled function names.
  121. if normalize_lambdas:
  122. function = _anon_namespace_re.sub(u"〔anonymousnamespace〕::", function)
  123. # Remove the arguments if there is one.
  124. def process_args(value, start):
  125. value = value.strip()
  126. if value in ("anonymous namespace", "operator"):
  127. return "(%s)" % value
  128. return ""
  129. function = replace_enclosed_string(function, "(", ")", process_args)
  130. # Resolve generic types, but special case rust which uses things like
  131. # <Foo as Bar>::baz to denote traits.
  132. def process_generics(value, start):
  133. # Special case for lambdas
  134. if value == "lambda" or _lambda_re.match(value):
  135. return "<%s>" % value
  136. if start > 0:
  137. return "<T>"
  138. # Rust special cases
  139. value = _rust_blanket_re.sub("", value) # prefer trait for blanket impls
  140. value = replace_enclosed_string(value, "<", ">", process_generics)
  141. return value.split(" as ", 1)[0]
  142. function = replace_enclosed_string(function, "<", ">", process_generics)
  143. tokens = split_func_tokens(function)
  144. # MSVC demangles generic operator functions with a space between the
  145. # function name and the generics. Ensure that those two components both end
  146. # up in the function name.
  147. if len(tokens) > 1 and tokens[-1] == "<T>":
  148. tokens.pop()
  149. tokens[-1] += " <T>"
  150. # find the token which is the function name. Since we chopped of C++
  151. # trailers there are only two cases we care about: the token left to
  152. # the -> return marker which is for instance used in Swift and if that
  153. # is not found, the last token in the last.
  154. #
  155. # ["unsigned", "int", "whatever"] -> whatever
  156. # ["@objc", "whatever", "->", "int"] -> whatever
  157. try:
  158. func_token = tokens[tokens.index(u"⟿") - 1]
  159. except ValueError:
  160. if tokens:
  161. func_token = tokens[-1]
  162. else:
  163. func_token = None
  164. if func_token:
  165. function = (
  166. func_token.replace(u"⟨", "<")
  167. .replace(u"◯", "()")
  168. .replace(u" ⟿ ", " -> ")
  169. .replace(u"〔anonymousnamespace〕", "`anonymous namespace'")
  170. )
  171. # This really should never happen
  172. else:
  173. function = original_function
  174. # trim off rust markers
  175. function = _rust_hash.sub("", function)
  176. # trim off windows decl markers
  177. return _windecl_hash.sub("\\1", function)
  178. def get_function_name_for_frame(frame, platform=None):
  179. """Given a frame object or dictionary this returns the actual function
  180. name trimmed.
  181. """
  182. if hasattr(frame, "get_raw_data"):
  183. frame = frame.get_raw_data()
  184. # if there is a raw function, prioritize the function unchanged
  185. if frame.get("raw_function"):
  186. return frame.get("function")
  187. # otherwise trim the function on demand
  188. rv = frame.get("function")
  189. if rv:
  190. return trim_function_name(rv, frame.get("platform") or platform)