tokenutil.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. """Token-related utilities"""
  2. # Copyright (c) IPython Development Team.
  3. # Distributed under the terms of the Modified BSD License.
  4. from __future__ import absolute_import, print_function
  5. from collections import namedtuple
  6. from io import StringIO
  7. from keyword import iskeyword
  8. from . import tokenize2
  9. from .py3compat import cast_unicode_py2
  10. Token = namedtuple('Token', ['token', 'text', 'start', 'end', 'line'])
  11. def generate_tokens(readline):
  12. """wrap generate_tokens to catch EOF errors"""
  13. try:
  14. for token in tokenize2.generate_tokens(readline):
  15. yield token
  16. except tokenize2.TokenError:
  17. # catch EOF error
  18. return
  19. def line_at_cursor(cell, cursor_pos=0):
  20. """Return the line in a cell at a given cursor position
  21. Used for calling line-based APIs that don't support multi-line input, yet.
  22. Parameters
  23. ----------
  24. cell: str
  25. multiline block of text
  26. cursor_pos: integer
  27. the cursor position
  28. Returns
  29. -------
  30. (line, offset): (text, integer)
  31. The line with the current cursor, and the character offset of the start of the line.
  32. """
  33. offset = 0
  34. lines = cell.splitlines(True)
  35. for line in lines:
  36. next_offset = offset + len(line)
  37. if next_offset >= cursor_pos:
  38. break
  39. offset = next_offset
  40. else:
  41. line = ""
  42. return (line, offset)
  43. def token_at_cursor(cell, cursor_pos=0):
  44. """Get the token at a given cursor
  45. Used for introspection.
  46. Function calls are prioritized, so the token for the callable will be returned
  47. if the cursor is anywhere inside the call.
  48. Parameters
  49. ----------
  50. cell : unicode
  51. A block of Python code
  52. cursor_pos : int
  53. The location of the cursor in the block where the token should be found
  54. """
  55. cell = cast_unicode_py2(cell)
  56. names = []
  57. tokens = []
  58. call_names = []
  59. offsets = {1: 0} # lines start at 1
  60. for tup in generate_tokens(StringIO(cell).readline):
  61. tok = Token(*tup)
  62. # token, text, start, end, line = tup
  63. start_line, start_col = tok.start
  64. end_line, end_col = tok.end
  65. if end_line + 1 not in offsets:
  66. # keep track of offsets for each line
  67. lines = tok.line.splitlines(True)
  68. for lineno, line in zip(range(start_line + 1, end_line + 2), lines):
  69. if lineno not in offsets:
  70. offsets[lineno] = offsets[lineno-1] + len(line)
  71. offset = offsets[start_line]
  72. # allow '|foo' to find 'foo' at the beginning of a line
  73. boundary = cursor_pos + 1 if start_col == 0 else cursor_pos
  74. if offset + start_col >= boundary:
  75. # current token starts after the cursor,
  76. # don't consume it
  77. break
  78. if tok.token == tokenize2.NAME and not iskeyword(tok.text):
  79. if names and tokens and tokens[-1].token == tokenize2.OP and tokens[-1].text == '.':
  80. names[-1] = "%s.%s" % (names[-1], tok.text)
  81. else:
  82. names.append(tok.text)
  83. elif tok.token == tokenize2.OP:
  84. if tok.text == '=' and names:
  85. # don't inspect the lhs of an assignment
  86. names.pop(-1)
  87. if tok.text == '(' and names:
  88. # if we are inside a function call, inspect the function
  89. call_names.append(names[-1])
  90. elif tok.text == ')' and call_names:
  91. call_names.pop(-1)
  92. tokens.append(tok)
  93. if offsets[end_line] + end_col > cursor_pos:
  94. # we found the cursor, stop reading
  95. break
  96. if call_names:
  97. return call_names[-1]
  98. elif names:
  99. return names[-1]
  100. else:
  101. return ''