tokenutil.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. """Token-related utilities"""
  2. # Copyright (c) IPython Development Team.
  3. # Distributed under the terms of the Modified BSD License.
  4. from collections import namedtuple
  5. from io import StringIO
  6. from keyword import iskeyword
  7. import tokenize
  8. from tokenize import TokenInfo
  9. from typing import List, Optional
  10. Token = namedtuple('Token', ['token', 'text', 'start', 'end', 'line'])
  11. def generate_tokens(readline):
  12. """wrap generate_tkens to catch EOF errors"""
  13. try:
  14. for token in tokenize.generate_tokens(readline):
  15. yield token
  16. except tokenize.TokenError:
  17. # catch EOF error
  18. return
  19. def generate_tokens_catch_errors(
  20. readline, extra_errors_to_catch: Optional[List[str]] = None
  21. ):
  22. default_errors_to_catch = [
  23. "unterminated string literal",
  24. "invalid non-printable character",
  25. "after line continuation character",
  26. ]
  27. assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)
  28. errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])
  29. tokens: List[TokenInfo] = []
  30. try:
  31. for token in tokenize.generate_tokens(readline):
  32. tokens.append(token)
  33. yield token
  34. except tokenize.TokenError as exc:
  35. if any(error in exc.args[0] for error in errors_to_catch):
  36. if tokens:
  37. start = tokens[-1].start[0], tokens[-1].end[0]
  38. end = start
  39. line = tokens[-1].line
  40. else:
  41. start = end = (1, 0)
  42. line = ""
  43. yield tokenize.TokenInfo(tokenize.ERRORTOKEN, "", start, end, line)
  44. else:
  45. # Catch EOF
  46. raise
  47. def line_at_cursor(cell, cursor_pos=0):
  48. """Return the line in a cell at a given cursor position
  49. Used for calling line-based APIs that don't support multi-line input, yet.
  50. Parameters
  51. ----------
  52. cell : str
  53. multiline block of text
  54. cursor_pos : integer
  55. the cursor position
  56. Returns
  57. -------
  58. (line, offset): (string, integer)
  59. The line with the current cursor, and the character offset of the start of the line.
  60. """
  61. offset = 0
  62. lines = cell.splitlines(True)
  63. for line in lines:
  64. next_offset = offset + len(line)
  65. if not line.endswith('\n'):
  66. # If the last line doesn't have a trailing newline, treat it as if
  67. # it does so that the cursor at the end of the line still counts
  68. # as being on that line.
  69. next_offset += 1
  70. if next_offset > cursor_pos:
  71. break
  72. offset = next_offset
  73. else:
  74. line = ""
  75. return (line, offset)
  76. def token_at_cursor(cell: str, cursor_pos: int = 0):
  77. """Get the token at a given cursor
  78. Used for introspection.
  79. Function calls are prioritized, so the token for the callable will be returned
  80. if the cursor is anywhere inside the call.
  81. Parameters
  82. ----------
  83. cell : str
  84. A block of Python code
  85. cursor_pos : int
  86. The location of the cursor in the block where the token should be found
  87. """
  88. names: List[str] = []
  89. tokens: List[Token] = []
  90. call_names = []
  91. offsets = {1: 0} # lines start at 1
  92. for tup in generate_tokens(StringIO(cell).readline):
  93. tok = Token(*tup)
  94. # token, text, start, end, line = tup
  95. start_line, start_col = tok.start
  96. end_line, end_col = tok.end
  97. if end_line + 1 not in offsets:
  98. # keep track of offsets for each line
  99. lines = tok.line.splitlines(True)
  100. for lineno, line in enumerate(lines, start_line + 1):
  101. if lineno not in offsets:
  102. offsets[lineno] = offsets[lineno-1] + len(line)
  103. offset = offsets[start_line]
  104. # allow '|foo' to find 'foo' at the beginning of a line
  105. boundary = cursor_pos + 1 if start_col == 0 else cursor_pos
  106. if offset + start_col >= boundary:
  107. # current token starts after the cursor,
  108. # don't consume it
  109. break
  110. if tok.token == tokenize.NAME and not iskeyword(tok.text):
  111. if names and tokens and tokens[-1].token == tokenize.OP and tokens[-1].text == '.':
  112. names[-1] = "%s.%s" % (names[-1], tok.text)
  113. else:
  114. names.append(tok.text)
  115. elif tok.token == tokenize.OP:
  116. if tok.text == '=' and names:
  117. # don't inspect the lhs of an assignment
  118. names.pop(-1)
  119. if tok.text == '(' and names:
  120. # if we are inside a function call, inspect the function
  121. call_names.append(names[-1])
  122. elif tok.text == ')' and call_names:
  123. call_names.pop(-1)
  124. tokens.append(tok)
  125. if offsets[end_line] + end_col > cursor_pos:
  126. # we found the cursor, stop reading
  127. break
  128. if call_names:
  129. return call_names[-1]
  130. elif names:
  131. return names[-1]
  132. else:
  133. return ''