123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228 |
- from .platform import get_behavior_family_for_platform
- import re
- _windecl_hash = re.compile(r"^@?(.*?)@[0-9]+$")
- _rust_hash = re.compile(r"::h[a-z0-9]{16}$")
- _cpp_trailer_re = re.compile(r"(\bconst\b|&)$")
- _rust_blanket_re = re.compile(r"^([A-Z] as )")
- _lambda_re = re.compile(
- r"""(?x)
- # gcc
- (?:
- \{
- lambda\(.*?\)\#\d+
- \}
- ) |
- # msvc
- (?:
- \blambda_[a-f0-9]{32}\b
- ) |
- # clang
- (?:
- \$_\d+\b
- )
- """
- )
- _anon_namespace_re = re.compile(
- r"""(?x)
- \?A0x[a-f0-9]{8}::
- """
- )
- PAIRS = {"(": ")", "{": "}", "[": "]", "<": ">"}
- def replace_enclosed_string(s, start, end, replacement=None):
- if start not in s:
- return s
- depth = 0
- rv = []
- pair_start = None
- for idx, char in enumerate(s):
- if char == start:
- if depth == 0:
- pair_start = idx
- depth += 1
- elif char == end:
- depth -= 1
- if depth == 0:
- if replacement is not None:
- if callable(replacement):
- rv.append(replacement(s[pair_start + 1 : idx], pair_start))
- else:
- rv.append(replacement)
- elif depth == 0:
- rv.append(char)
- return "".join(rv)
- def split_func_tokens(s):
- buf = []
- rv = []
- stack = []
- end = 0
- for idx, char in enumerate(s):
- if char in PAIRS:
- stack.append(PAIRS[char])
- elif stack and char == stack[-1]:
- stack.pop()
- if not stack:
- buf.append(s[end : idx + 1])
- end = idx + 1
- elif not stack:
- if char.isspace():
- if buf:
- rv.append(buf)
- buf = []
- else:
- buf.append(s[end : idx + 1])
- end = idx + 1
- if buf:
- rv.append(buf)
- return ["".join(x) for x in rv]
- def trim_function_name(function, platform, normalize_lambdas=True):
- """Given a function value from the frame's function attribute this returns
- a trimmed version that can be stored in `function_name`. This is only used
- if the client did not supply a value itself already.
- """
- if get_behavior_family_for_platform(platform) != "native":
- return function
- if function in ("<redacted>", "<unknown>"):
- return function
- original_function = function
- function = function.strip()
- # Ensure we don't operate on objc functions
- if function.startswith(("[", "+[", "-[")):
- return function
- # Chop off C++ trailers
- while True:
- match = _cpp_trailer_re.search(function)
- if match is None:
- break
- function = function[: match.start()].rstrip()
- # Because operator<< really screws with our balancing, so let's work
- # around that by replacing it with a character we do not observe in
- # `split_func_tokens` or `replace_enclosed_string`.
- function = (
- function.replace("operator<<", u"operator⟨⟨")
- .replace("operator<", u"operator⟨")
- .replace("operator()", u"operator◯")
- .replace(" -> ", u" ⟿ ")
- .replace("`anonymous namespace'", u"〔anonymousnamespace〕")
- )
- # normalize C++ lambdas. This is necessary because different
- # compilers use different rules for now to name a lambda and they are
- # all quite inconsistent. This does not give us perfect answers to
- # this problem but closer. In particular msvc will call a lambda
- # something like `lambda_deadbeefeefffeeffeeff` whereas clang for
- # instance will name it `main::$_0` which will tell us in which outer
- # function it was declared.
- if normalize_lambdas:
- function = _lambda_re.sub("lambda", function)
- # Normalize MSVC anonymous namespaces from inline functions. For inline
- # functions, the compiler inconsistently renders anonymous namespaces with
- # their hash. For regular functions, "`anonymous namespace'" is used.
- # The regular expression matches the trailing "::" to avoid accidental
- # replacement in mangled function names.
- if normalize_lambdas:
- function = _anon_namespace_re.sub(u"〔anonymousnamespace〕::", function)
- # Remove the arguments if there is one.
- def process_args(value, start):
- value = value.strip()
- if value in ("anonymous namespace", "operator"):
- return "(%s)" % value
- return ""
- function = replace_enclosed_string(function, "(", ")", process_args)
- # Resolve generic types, but special case rust which uses things like
- # <Foo as Bar>::baz to denote traits.
- def process_generics(value, start):
- # Special case for lambdas
- if value == "lambda" or _lambda_re.match(value):
- return "<%s>" % value
- if start > 0:
- return "<T>"
- # Rust special cases
- value = _rust_blanket_re.sub("", value) # prefer trait for blanket impls
- value = replace_enclosed_string(value, "<", ">", process_generics)
- return value.split(" as ", 1)[0]
- function = replace_enclosed_string(function, "<", ">", process_generics)
- tokens = split_func_tokens(function)
- # MSVC demangles generic operator functions with a space between the
- # function name and the generics. Ensure that those two components both end
- # up in the function name.
- if len(tokens) > 1 and tokens[-1] == "<T>":
- tokens.pop()
- tokens[-1] += " <T>"
- # find the token which is the function name. Since we chopped of C++
- # trailers there are only two cases we care about: the token left to
- # the -> return marker which is for instance used in Swift and if that
- # is not found, the last token in the last.
- #
- # ["unsigned", "int", "whatever"] -> whatever
- # ["@objc", "whatever", "->", "int"] -> whatever
- try:
- func_token = tokens[tokens.index(u"⟿") - 1]
- except ValueError:
- if tokens:
- func_token = tokens[-1]
- else:
- func_token = None
- if func_token:
- function = (
- func_token.replace(u"⟨", "<")
- .replace(u"◯", "()")
- .replace(u" ⟿ ", " -> ")
- .replace(u"〔anonymousnamespace〕", "`anonymous namespace'")
- )
- # This really should never happen
- else:
- function = original_function
- # trim off rust markers
- function = _rust_hash.sub("", function)
- # trim off windows decl markers
- return _windecl_hash.sub("\\1", function)
- def get_function_name_for_frame(frame, platform=None):
- """Given a frame object or dictionary this returns the actual function
- name trimmed.
- """
- if hasattr(frame, "get_raw_data"):
- frame = frame.get_raw_data()
- # if there is a raw function, prioritize the function unchanged
- if frame.get("raw_function"):
- return frame.get("function")
- # otherwise trim the function on demand
- rv = frame.get("function")
- if rv:
- return trim_function_name(rv, frame.get("platform") or platform)
|