document.py 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181
  1. """
  2. The `Document` that implements all the text operations/querying.
  3. """
  4. from __future__ import annotations
  5. import bisect
  6. import re
  7. import string
  8. import weakref
  9. from typing import Callable, Dict, Iterable, List, NoReturn, Pattern, cast
  10. from .clipboard import ClipboardData
  11. from .filters import vi_mode
  12. from .selection import PasteMode, SelectionState, SelectionType
  13. __all__ = [
  14. "Document",
  15. ]
  16. # Regex for finding "words" in documents. (We consider a group of alnum
  17. # characters a word, but also a group of special characters a word, as long as
  18. # it doesn't contain a space.)
  19. # (This is a 'word' in Vi.)
  20. _FIND_WORD_RE = re.compile(r"([a-zA-Z0-9_]+|[^a-zA-Z0-9_\s]+)")
  21. _FIND_CURRENT_WORD_RE = re.compile(r"^([a-zA-Z0-9_]+|[^a-zA-Z0-9_\s]+)")
  22. _FIND_CURRENT_WORD_INCLUDE_TRAILING_WHITESPACE_RE = re.compile(
  23. r"^(([a-zA-Z0-9_]+|[^a-zA-Z0-9_\s]+)\s*)"
  24. )
  25. # Regex for finding "WORDS" in documents.
  26. # (This is a 'WORD in Vi.)
  27. _FIND_BIG_WORD_RE = re.compile(r"([^\s]+)")
  28. _FIND_CURRENT_BIG_WORD_RE = re.compile(r"^([^\s]+)")
  29. _FIND_CURRENT_BIG_WORD_INCLUDE_TRAILING_WHITESPACE_RE = re.compile(r"^([^\s]+\s*)")
  30. # Share the Document._cache between all Document instances.
  31. # (Document instances are considered immutable. That means that if another
  32. # `Document` is constructed with the same text, it should have the same
  33. # `_DocumentCache`.)
  34. _text_to_document_cache: dict[str, _DocumentCache] = cast(
  35. Dict[str, "_DocumentCache"],
  36. weakref.WeakValueDictionary(), # Maps document.text to DocumentCache instance.
  37. )
  38. class _ImmutableLineList(List[str]):
  39. """
  40. Some protection for our 'lines' list, which is assumed to be immutable in the cache.
  41. (Useful for detecting obvious bugs.)
  42. """
  43. def _error(self, *a: object, **kw: object) -> NoReturn:
  44. raise NotImplementedError("Attempt to modify an immutable list.")
  45. __setitem__ = _error # type: ignore
  46. append = _error
  47. clear = _error
  48. extend = _error
  49. insert = _error
  50. pop = _error
  51. remove = _error
  52. reverse = _error
  53. sort = _error # type: ignore
  54. class _DocumentCache:
  55. def __init__(self) -> None:
  56. #: List of lines for the Document text.
  57. self.lines: _ImmutableLineList | None = None
  58. #: List of index positions, pointing to the start of all the lines.
  59. self.line_indexes: list[int] | None = None
  60. class Document:
  61. """
  62. This is a immutable class around the text and cursor position, and contains
  63. methods for querying this data, e.g. to give the text before the cursor.
  64. This class is usually instantiated by a :class:`~prompt_toolkit.buffer.Buffer`
  65. object, and accessed as the `document` property of that class.
  66. :param text: string
  67. :param cursor_position: int
  68. :param selection: :class:`.SelectionState`
  69. """
  70. __slots__ = ("_text", "_cursor_position", "_selection", "_cache")
  71. def __init__(
  72. self,
  73. text: str = "",
  74. cursor_position: int | None = None,
  75. selection: SelectionState | None = None,
  76. ) -> None:
  77. # Check cursor position. It can also be right after the end. (Where we
  78. # insert text.)
  79. assert cursor_position is None or cursor_position <= len(text), AssertionError(
  80. f"cursor_position={cursor_position!r}, len_text={len(text)!r}"
  81. )
  82. # By default, if no cursor position was given, make sure to put the
  83. # cursor position is at the end of the document. This is what makes
  84. # sense in most places.
  85. if cursor_position is None:
  86. cursor_position = len(text)
  87. # Keep these attributes private. A `Document` really has to be
  88. # considered to be immutable, because otherwise the caching will break
  89. # things. Because of that, we wrap these into read-only properties.
  90. self._text = text
  91. self._cursor_position = cursor_position
  92. self._selection = selection
  93. # Cache for lines/indexes. (Shared with other Document instances that
  94. # contain the same text.
  95. try:
  96. self._cache = _text_to_document_cache[self.text]
  97. except KeyError:
  98. self._cache = _DocumentCache()
  99. _text_to_document_cache[self.text] = self._cache
  100. # XX: For some reason, above, we can't use 'WeakValueDictionary.setdefault'.
  101. # This fails in Pypy3. `self._cache` becomes None, because that's what
  102. # 'setdefault' returns.
  103. # self._cache = _text_to_document_cache.setdefault(self.text, _DocumentCache())
  104. # assert self._cache
  105. def __repr__(self) -> str:
  106. return f"{self.__class__.__name__}({self.text!r}, {self.cursor_position!r})"
  107. def __eq__(self, other: object) -> bool:
  108. if not isinstance(other, Document):
  109. return False
  110. return (
  111. self.text == other.text
  112. and self.cursor_position == other.cursor_position
  113. and self.selection == other.selection
  114. )
  115. @property
  116. def text(self) -> str:
  117. "The document text."
  118. return self._text
  119. @property
  120. def cursor_position(self) -> int:
  121. "The document cursor position."
  122. return self._cursor_position
  123. @property
  124. def selection(self) -> SelectionState | None:
  125. ":class:`.SelectionState` object."
  126. return self._selection
  127. @property
  128. def current_char(self) -> str:
  129. """Return character under cursor or an empty string."""
  130. return self._get_char_relative_to_cursor(0) or ""
  131. @property
  132. def char_before_cursor(self) -> str:
  133. """Return character before the cursor or an empty string."""
  134. return self._get_char_relative_to_cursor(-1) or ""
  135. @property
  136. def text_before_cursor(self) -> str:
  137. return self.text[: self.cursor_position :]
  138. @property
  139. def text_after_cursor(self) -> str:
  140. return self.text[self.cursor_position :]
  141. @property
  142. def current_line_before_cursor(self) -> str:
  143. """Text from the start of the line until the cursor."""
  144. _, _, text = self.text_before_cursor.rpartition("\n")
  145. return text
  146. @property
  147. def current_line_after_cursor(self) -> str:
  148. """Text from the cursor until the end of the line."""
  149. text, _, _ = self.text_after_cursor.partition("\n")
  150. return text
  151. @property
  152. def lines(self) -> list[str]:
  153. """
  154. Array of all the lines.
  155. """
  156. # Cache, because this one is reused very often.
  157. if self._cache.lines is None:
  158. self._cache.lines = _ImmutableLineList(self.text.split("\n"))
  159. return self._cache.lines
  160. @property
  161. def _line_start_indexes(self) -> list[int]:
  162. """
  163. Array pointing to the start indexes of all the lines.
  164. """
  165. # Cache, because this is often reused. (If it is used, it's often used
  166. # many times. And this has to be fast for editing big documents!)
  167. if self._cache.line_indexes is None:
  168. # Create list of line lengths.
  169. line_lengths = map(len, self.lines)
  170. # Calculate cumulative sums.
  171. indexes = [0]
  172. append = indexes.append
  173. pos = 0
  174. for line_length in line_lengths:
  175. pos += line_length + 1
  176. append(pos)
  177. # Remove the last item. (This is not a new line.)
  178. if len(indexes) > 1:
  179. indexes.pop()
  180. self._cache.line_indexes = indexes
  181. return self._cache.line_indexes
  182. @property
  183. def lines_from_current(self) -> list[str]:
  184. """
  185. Array of the lines starting from the current line, until the last line.
  186. """
  187. return self.lines[self.cursor_position_row :]
  188. @property
  189. def line_count(self) -> int:
  190. r"""Return the number of lines in this document. If the document ends
  191. with a trailing \n, that counts as the beginning of a new line."""
  192. return len(self.lines)
  193. @property
  194. def current_line(self) -> str:
  195. """Return the text on the line where the cursor is. (when the input
  196. consists of just one line, it equals `text`."""
  197. return self.current_line_before_cursor + self.current_line_after_cursor
  198. @property
  199. def leading_whitespace_in_current_line(self) -> str:
  200. """The leading whitespace in the left margin of the current line."""
  201. current_line = self.current_line
  202. length = len(current_line) - len(current_line.lstrip())
  203. return current_line[:length]
  204. def _get_char_relative_to_cursor(self, offset: int = 0) -> str:
  205. """
  206. Return character relative to cursor position, or empty string
  207. """
  208. try:
  209. return self.text[self.cursor_position + offset]
  210. except IndexError:
  211. return ""
  212. @property
  213. def on_first_line(self) -> bool:
  214. """
  215. True when we are at the first line.
  216. """
  217. return self.cursor_position_row == 0
  218. @property
  219. def on_last_line(self) -> bool:
  220. """
  221. True when we are at the last line.
  222. """
  223. return self.cursor_position_row == self.line_count - 1
  224. @property
  225. def cursor_position_row(self) -> int:
  226. """
  227. Current row. (0-based.)
  228. """
  229. row, _ = self._find_line_start_index(self.cursor_position)
  230. return row
  231. @property
  232. def cursor_position_col(self) -> int:
  233. """
  234. Current column. (0-based.)
  235. """
  236. # (Don't use self.text_before_cursor to calculate this. Creating
  237. # substrings and doing rsplit is too expensive for getting the cursor
  238. # position.)
  239. _, line_start_index = self._find_line_start_index(self.cursor_position)
  240. return self.cursor_position - line_start_index
  241. def _find_line_start_index(self, index: int) -> tuple[int, int]:
  242. """
  243. For the index of a character at a certain line, calculate the index of
  244. the first character on that line.
  245. Return (row, index) tuple.
  246. """
  247. indexes = self._line_start_indexes
  248. pos = bisect.bisect_right(indexes, index) - 1
  249. return pos, indexes[pos]
  250. def translate_index_to_position(self, index: int) -> tuple[int, int]:
  251. """
  252. Given an index for the text, return the corresponding (row, col) tuple.
  253. (0-based. Returns (0, 0) for index=0.)
  254. """
  255. # Find start of this line.
  256. row, row_index = self._find_line_start_index(index)
  257. col = index - row_index
  258. return row, col
  259. def translate_row_col_to_index(self, row: int, col: int) -> int:
  260. """
  261. Given a (row, col) tuple, return the corresponding index.
  262. (Row and col params are 0-based.)
  263. Negative row/col values are turned into zero.
  264. """
  265. try:
  266. result = self._line_start_indexes[row]
  267. line = self.lines[row]
  268. except IndexError:
  269. if row < 0:
  270. result = self._line_start_indexes[0]
  271. line = self.lines[0]
  272. else:
  273. result = self._line_start_indexes[-1]
  274. line = self.lines[-1]
  275. result += max(0, min(col, len(line)))
  276. # Keep in range. (len(self.text) is included, because the cursor can be
  277. # right after the end of the text as well.)
  278. result = max(0, min(result, len(self.text)))
  279. return result
  280. @property
  281. def is_cursor_at_the_end(self) -> bool:
  282. """True when the cursor is at the end of the text."""
  283. return self.cursor_position == len(self.text)
  284. @property
  285. def is_cursor_at_the_end_of_line(self) -> bool:
  286. """True when the cursor is at the end of this line."""
  287. return self.current_char in ("\n", "")
  288. def has_match_at_current_position(self, sub: str) -> bool:
  289. """
  290. `True` when this substring is found at the cursor position.
  291. """
  292. return self.text.find(sub, self.cursor_position) == self.cursor_position
  293. def find(
  294. self,
  295. sub: str,
  296. in_current_line: bool = False,
  297. include_current_position: bool = False,
  298. ignore_case: bool = False,
  299. count: int = 1,
  300. ) -> int | None:
  301. """
  302. Find `text` after the cursor, return position relative to the cursor
  303. position. Return `None` if nothing was found.
  304. :param count: Find the n-th occurrence.
  305. """
  306. assert isinstance(ignore_case, bool)
  307. if in_current_line:
  308. text = self.current_line_after_cursor
  309. else:
  310. text = self.text_after_cursor
  311. if not include_current_position:
  312. if len(text) == 0:
  313. return None # (Otherwise, we always get a match for the empty string.)
  314. else:
  315. text = text[1:]
  316. flags = re.IGNORECASE if ignore_case else 0
  317. iterator = re.finditer(re.escape(sub), text, flags)
  318. try:
  319. for i, match in enumerate(iterator):
  320. if i + 1 == count:
  321. if include_current_position:
  322. return match.start(0)
  323. else:
  324. return match.start(0) + 1
  325. except StopIteration:
  326. pass
  327. return None
  328. def find_all(self, sub: str, ignore_case: bool = False) -> list[int]:
  329. """
  330. Find all occurrences of the substring. Return a list of absolute
  331. positions in the document.
  332. """
  333. flags = re.IGNORECASE if ignore_case else 0
  334. return [a.start() for a in re.finditer(re.escape(sub), self.text, flags)]
  335. def find_backwards(
  336. self,
  337. sub: str,
  338. in_current_line: bool = False,
  339. ignore_case: bool = False,
  340. count: int = 1,
  341. ) -> int | None:
  342. """
  343. Find `text` before the cursor, return position relative to the cursor
  344. position. Return `None` if nothing was found.
  345. :param count: Find the n-th occurrence.
  346. """
  347. if in_current_line:
  348. before_cursor = self.current_line_before_cursor[::-1]
  349. else:
  350. before_cursor = self.text_before_cursor[::-1]
  351. flags = re.IGNORECASE if ignore_case else 0
  352. iterator = re.finditer(re.escape(sub[::-1]), before_cursor, flags)
  353. try:
  354. for i, match in enumerate(iterator):
  355. if i + 1 == count:
  356. return -match.start(0) - len(sub)
  357. except StopIteration:
  358. pass
  359. return None
  360. def get_word_before_cursor(
  361. self, WORD: bool = False, pattern: Pattern[str] | None = None
  362. ) -> str:
  363. """
  364. Give the word before the cursor.
  365. If we have whitespace before the cursor this returns an empty string.
  366. :param pattern: (None or compiled regex). When given, use this regex
  367. pattern.
  368. """
  369. if self._is_word_before_cursor_complete(WORD=WORD, pattern=pattern):
  370. # Space before the cursor or no text before cursor.
  371. return ""
  372. text_before_cursor = self.text_before_cursor
  373. start = self.find_start_of_previous_word(WORD=WORD, pattern=pattern) or 0
  374. return text_before_cursor[len(text_before_cursor) + start :]
  375. def _is_word_before_cursor_complete(
  376. self, WORD: bool = False, pattern: Pattern[str] | None = None
  377. ) -> bool:
  378. if pattern:
  379. return self.find_start_of_previous_word(WORD=WORD, pattern=pattern) is None
  380. else:
  381. return (
  382. self.text_before_cursor == "" or self.text_before_cursor[-1:].isspace()
  383. )
  384. def find_start_of_previous_word(
  385. self, count: int = 1, WORD: bool = False, pattern: Pattern[str] | None = None
  386. ) -> int | None:
  387. """
  388. Return an index relative to the cursor position pointing to the start
  389. of the previous word. Return `None` if nothing was found.
  390. :param pattern: (None or compiled regex). When given, use this regex
  391. pattern.
  392. """
  393. assert not (WORD and pattern)
  394. # Reverse the text before the cursor, in order to do an efficient
  395. # backwards search.
  396. text_before_cursor = self.text_before_cursor[::-1]
  397. if pattern:
  398. regex = pattern
  399. elif WORD:
  400. regex = _FIND_BIG_WORD_RE
  401. else:
  402. regex = _FIND_WORD_RE
  403. iterator = regex.finditer(text_before_cursor)
  404. try:
  405. for i, match in enumerate(iterator):
  406. if i + 1 == count:
  407. return -match.end(0)
  408. except StopIteration:
  409. pass
  410. return None
  411. def find_boundaries_of_current_word(
  412. self,
  413. WORD: bool = False,
  414. include_leading_whitespace: bool = False,
  415. include_trailing_whitespace: bool = False,
  416. ) -> tuple[int, int]:
  417. """
  418. Return the relative boundaries (startpos, endpos) of the current word under the
  419. cursor. (This is at the current line, because line boundaries obviously
  420. don't belong to any word.)
  421. If not on a word, this returns (0,0)
  422. """
  423. text_before_cursor = self.current_line_before_cursor[::-1]
  424. text_after_cursor = self.current_line_after_cursor
  425. def get_regex(include_whitespace: bool) -> Pattern[str]:
  426. return {
  427. (False, False): _FIND_CURRENT_WORD_RE,
  428. (False, True): _FIND_CURRENT_WORD_INCLUDE_TRAILING_WHITESPACE_RE,
  429. (True, False): _FIND_CURRENT_BIG_WORD_RE,
  430. (True, True): _FIND_CURRENT_BIG_WORD_INCLUDE_TRAILING_WHITESPACE_RE,
  431. }[(WORD, include_whitespace)]
  432. match_before = get_regex(include_leading_whitespace).search(text_before_cursor)
  433. match_after = get_regex(include_trailing_whitespace).search(text_after_cursor)
  434. # When there is a match before and after, and we're not looking for
  435. # WORDs, make sure that both the part before and after the cursor are
  436. # either in the [a-zA-Z_] alphabet or not. Otherwise, drop the part
  437. # before the cursor.
  438. if not WORD and match_before and match_after:
  439. c1 = self.text[self.cursor_position - 1]
  440. c2 = self.text[self.cursor_position]
  441. alphabet = string.ascii_letters + "0123456789_"
  442. if (c1 in alphabet) != (c2 in alphabet):
  443. match_before = None
  444. return (
  445. -match_before.end(1) if match_before else 0,
  446. match_after.end(1) if match_after else 0,
  447. )
  448. def get_word_under_cursor(self, WORD: bool = False) -> str:
  449. """
  450. Return the word, currently below the cursor.
  451. This returns an empty string when the cursor is on a whitespace region.
  452. """
  453. start, end = self.find_boundaries_of_current_word(WORD=WORD)
  454. return self.text[self.cursor_position + start : self.cursor_position + end]
  455. def find_next_word_beginning(
  456. self, count: int = 1, WORD: bool = False
  457. ) -> int | None:
  458. """
  459. Return an index relative to the cursor position pointing to the start
  460. of the next word. Return `None` if nothing was found.
  461. """
  462. if count < 0:
  463. return self.find_previous_word_beginning(count=-count, WORD=WORD)
  464. regex = _FIND_BIG_WORD_RE if WORD else _FIND_WORD_RE
  465. iterator = regex.finditer(self.text_after_cursor)
  466. try:
  467. for i, match in enumerate(iterator):
  468. # Take first match, unless it's the word on which we're right now.
  469. if i == 0 and match.start(1) == 0:
  470. count += 1
  471. if i + 1 == count:
  472. return match.start(1)
  473. except StopIteration:
  474. pass
  475. return None
  476. def find_next_word_ending(
  477. self, include_current_position: bool = False, count: int = 1, WORD: bool = False
  478. ) -> int | None:
  479. """
  480. Return an index relative to the cursor position pointing to the end
  481. of the next word. Return `None` if nothing was found.
  482. """
  483. if count < 0:
  484. return self.find_previous_word_ending(count=-count, WORD=WORD)
  485. if include_current_position:
  486. text = self.text_after_cursor
  487. else:
  488. text = self.text_after_cursor[1:]
  489. regex = _FIND_BIG_WORD_RE if WORD else _FIND_WORD_RE
  490. iterable = regex.finditer(text)
  491. try:
  492. for i, match in enumerate(iterable):
  493. if i + 1 == count:
  494. value = match.end(1)
  495. if include_current_position:
  496. return value
  497. else:
  498. return value + 1
  499. except StopIteration:
  500. pass
  501. return None
  502. def find_previous_word_beginning(
  503. self, count: int = 1, WORD: bool = False
  504. ) -> int | None:
  505. """
  506. Return an index relative to the cursor position pointing to the start
  507. of the previous word. Return `None` if nothing was found.
  508. """
  509. if count < 0:
  510. return self.find_next_word_beginning(count=-count, WORD=WORD)
  511. regex = _FIND_BIG_WORD_RE if WORD else _FIND_WORD_RE
  512. iterator = regex.finditer(self.text_before_cursor[::-1])
  513. try:
  514. for i, match in enumerate(iterator):
  515. if i + 1 == count:
  516. return -match.end(1)
  517. except StopIteration:
  518. pass
  519. return None
  520. def find_previous_word_ending(
  521. self, count: int = 1, WORD: bool = False
  522. ) -> int | None:
  523. """
  524. Return an index relative to the cursor position pointing to the end
  525. of the previous word. Return `None` if nothing was found.
  526. """
  527. if count < 0:
  528. return self.find_next_word_ending(count=-count, WORD=WORD)
  529. text_before_cursor = self.text_after_cursor[:1] + self.text_before_cursor[::-1]
  530. regex = _FIND_BIG_WORD_RE if WORD else _FIND_WORD_RE
  531. iterator = regex.finditer(text_before_cursor)
  532. try:
  533. for i, match in enumerate(iterator):
  534. # Take first match, unless it's the word on which we're right now.
  535. if i == 0 and match.start(1) == 0:
  536. count += 1
  537. if i + 1 == count:
  538. return -match.start(1) + 1
  539. except StopIteration:
  540. pass
  541. return None
  542. def find_next_matching_line(
  543. self, match_func: Callable[[str], bool], count: int = 1
  544. ) -> int | None:
  545. """
  546. Look downwards for empty lines.
  547. Return the line index, relative to the current line.
  548. """
  549. result = None
  550. for index, line in enumerate(self.lines[self.cursor_position_row + 1 :]):
  551. if match_func(line):
  552. result = 1 + index
  553. count -= 1
  554. if count == 0:
  555. break
  556. return result
  557. def find_previous_matching_line(
  558. self, match_func: Callable[[str], bool], count: int = 1
  559. ) -> int | None:
  560. """
  561. Look upwards for empty lines.
  562. Return the line index, relative to the current line.
  563. """
  564. result = None
  565. for index, line in enumerate(self.lines[: self.cursor_position_row][::-1]):
  566. if match_func(line):
  567. result = -1 - index
  568. count -= 1
  569. if count == 0:
  570. break
  571. return result
  572. def get_cursor_left_position(self, count: int = 1) -> int:
  573. """
  574. Relative position for cursor left.
  575. """
  576. if count < 0:
  577. return self.get_cursor_right_position(-count)
  578. return -min(self.cursor_position_col, count)
  579. def get_cursor_right_position(self, count: int = 1) -> int:
  580. """
  581. Relative position for cursor_right.
  582. """
  583. if count < 0:
  584. return self.get_cursor_left_position(-count)
  585. return min(count, len(self.current_line_after_cursor))
  586. def get_cursor_up_position(
  587. self, count: int = 1, preferred_column: int | None = None
  588. ) -> int:
  589. """
  590. Return the relative cursor position (character index) where we would be if the
  591. user pressed the arrow-up button.
  592. :param preferred_column: When given, go to this column instead of
  593. staying at the current column.
  594. """
  595. assert count >= 1
  596. column = (
  597. self.cursor_position_col if preferred_column is None else preferred_column
  598. )
  599. return (
  600. self.translate_row_col_to_index(
  601. max(0, self.cursor_position_row - count), column
  602. )
  603. - self.cursor_position
  604. )
  605. def get_cursor_down_position(
  606. self, count: int = 1, preferred_column: int | None = None
  607. ) -> int:
  608. """
  609. Return the relative cursor position (character index) where we would be if the
  610. user pressed the arrow-down button.
  611. :param preferred_column: When given, go to this column instead of
  612. staying at the current column.
  613. """
  614. assert count >= 1
  615. column = (
  616. self.cursor_position_col if preferred_column is None else preferred_column
  617. )
  618. return (
  619. self.translate_row_col_to_index(self.cursor_position_row + count, column)
  620. - self.cursor_position
  621. )
  622. def find_enclosing_bracket_right(
  623. self, left_ch: str, right_ch: str, end_pos: int | None = None
  624. ) -> int | None:
  625. """
  626. Find the right bracket enclosing current position. Return the relative
  627. position to the cursor position.
  628. When `end_pos` is given, don't look past the position.
  629. """
  630. if self.current_char == right_ch:
  631. return 0
  632. if end_pos is None:
  633. end_pos = len(self.text)
  634. else:
  635. end_pos = min(len(self.text), end_pos)
  636. stack = 1
  637. # Look forward.
  638. for i in range(self.cursor_position + 1, end_pos):
  639. c = self.text[i]
  640. if c == left_ch:
  641. stack += 1
  642. elif c == right_ch:
  643. stack -= 1
  644. if stack == 0:
  645. return i - self.cursor_position
  646. return None
  647. def find_enclosing_bracket_left(
  648. self, left_ch: str, right_ch: str, start_pos: int | None = None
  649. ) -> int | None:
  650. """
  651. Find the left bracket enclosing current position. Return the relative
  652. position to the cursor position.
  653. When `start_pos` is given, don't look past the position.
  654. """
  655. if self.current_char == left_ch:
  656. return 0
  657. if start_pos is None:
  658. start_pos = 0
  659. else:
  660. start_pos = max(0, start_pos)
  661. stack = 1
  662. # Look backward.
  663. for i in range(self.cursor_position - 1, start_pos - 1, -1):
  664. c = self.text[i]
  665. if c == right_ch:
  666. stack += 1
  667. elif c == left_ch:
  668. stack -= 1
  669. if stack == 0:
  670. return i - self.cursor_position
  671. return None
  672. def find_matching_bracket_position(
  673. self, start_pos: int | None = None, end_pos: int | None = None
  674. ) -> int:
  675. """
  676. Return relative cursor position of matching [, (, { or < bracket.
  677. When `start_pos` or `end_pos` are given. Don't look past the positions.
  678. """
  679. # Look for a match.
  680. for pair in "()", "[]", "{}", "<>":
  681. A = pair[0]
  682. B = pair[1]
  683. if self.current_char == A:
  684. return self.find_enclosing_bracket_right(A, B, end_pos=end_pos) or 0
  685. elif self.current_char == B:
  686. return self.find_enclosing_bracket_left(A, B, start_pos=start_pos) or 0
  687. return 0
  688. def get_start_of_document_position(self) -> int:
  689. """Relative position for the start of the document."""
  690. return -self.cursor_position
  691. def get_end_of_document_position(self) -> int:
  692. """Relative position for the end of the document."""
  693. return len(self.text) - self.cursor_position
  694. def get_start_of_line_position(self, after_whitespace: bool = False) -> int:
  695. """Relative position for the start of this line."""
  696. if after_whitespace:
  697. current_line = self.current_line
  698. return (
  699. len(current_line)
  700. - len(current_line.lstrip())
  701. - self.cursor_position_col
  702. )
  703. else:
  704. return -len(self.current_line_before_cursor)
  705. def get_end_of_line_position(self) -> int:
  706. """Relative position for the end of this line."""
  707. return len(self.current_line_after_cursor)
  708. def last_non_blank_of_current_line_position(self) -> int:
  709. """
  710. Relative position for the last non blank character of this line.
  711. """
  712. return len(self.current_line.rstrip()) - self.cursor_position_col - 1
  713. def get_column_cursor_position(self, column: int) -> int:
  714. """
  715. Return the relative cursor position for this column at the current
  716. line. (It will stay between the boundaries of the line in case of a
  717. larger number.)
  718. """
  719. line_length = len(self.current_line)
  720. current_column = self.cursor_position_col
  721. column = max(0, min(line_length, column))
  722. return column - current_column
  723. def selection_range(
  724. self,
  725. ) -> tuple[
  726. int, int
  727. ]: # XXX: shouldn't this return `None` if there is no selection???
  728. """
  729. Return (from, to) tuple of the selection.
  730. start and end position are included.
  731. This doesn't take the selection type into account. Use
  732. `selection_ranges` instead.
  733. """
  734. if self.selection:
  735. from_, to = sorted(
  736. [self.cursor_position, self.selection.original_cursor_position]
  737. )
  738. else:
  739. from_, to = self.cursor_position, self.cursor_position
  740. return from_, to
  741. def selection_ranges(self) -> Iterable[tuple[int, int]]:
  742. """
  743. Return a list of `(from, to)` tuples for the selection or none if
  744. nothing was selected. The upper boundary is not included.
  745. This will yield several (from, to) tuples in case of a BLOCK selection.
  746. This will return zero ranges, like (8,8) for empty lines in a block
  747. selection.
  748. """
  749. if self.selection:
  750. from_, to = sorted(
  751. [self.cursor_position, self.selection.original_cursor_position]
  752. )
  753. if self.selection.type == SelectionType.BLOCK:
  754. from_line, from_column = self.translate_index_to_position(from_)
  755. to_line, to_column = self.translate_index_to_position(to)
  756. from_column, to_column = sorted([from_column, to_column])
  757. lines = self.lines
  758. if vi_mode():
  759. to_column += 1
  760. for l in range(from_line, to_line + 1):
  761. line_length = len(lines[l])
  762. if from_column <= line_length:
  763. yield (
  764. self.translate_row_col_to_index(l, from_column),
  765. self.translate_row_col_to_index(
  766. l, min(line_length, to_column)
  767. ),
  768. )
  769. else:
  770. # In case of a LINES selection, go to the start/end of the lines.
  771. if self.selection.type == SelectionType.LINES:
  772. from_ = max(0, self.text.rfind("\n", 0, from_) + 1)
  773. if self.text.find("\n", to) >= 0:
  774. to = self.text.find("\n", to)
  775. else:
  776. to = len(self.text) - 1
  777. # In Vi mode, the upper boundary is always included. For Emacs,
  778. # that's not the case.
  779. if vi_mode():
  780. to += 1
  781. yield from_, to
  782. def selection_range_at_line(self, row: int) -> tuple[int, int] | None:
  783. """
  784. If the selection spans a portion of the given line, return a (from, to) tuple.
  785. The returned upper boundary is not included in the selection, so
  786. `(0, 0)` is an empty selection. `(0, 1)`, is a one character selection.
  787. Returns None if the selection doesn't cover this line at all.
  788. """
  789. if self.selection:
  790. line = self.lines[row]
  791. row_start = self.translate_row_col_to_index(row, 0)
  792. row_end = self.translate_row_col_to_index(row, len(line))
  793. from_, to = sorted(
  794. [self.cursor_position, self.selection.original_cursor_position]
  795. )
  796. # Take the intersection of the current line and the selection.
  797. intersection_start = max(row_start, from_)
  798. intersection_end = min(row_end, to)
  799. if intersection_start <= intersection_end:
  800. if self.selection.type == SelectionType.LINES:
  801. intersection_start = row_start
  802. intersection_end = row_end
  803. elif self.selection.type == SelectionType.BLOCK:
  804. _, col1 = self.translate_index_to_position(from_)
  805. _, col2 = self.translate_index_to_position(to)
  806. col1, col2 = sorted([col1, col2])
  807. if col1 > len(line):
  808. return None # Block selection doesn't cross this line.
  809. intersection_start = self.translate_row_col_to_index(row, col1)
  810. intersection_end = self.translate_row_col_to_index(row, col2)
  811. _, from_column = self.translate_index_to_position(intersection_start)
  812. _, to_column = self.translate_index_to_position(intersection_end)
  813. # In Vi mode, the upper boundary is always included. For Emacs
  814. # mode, that's not the case.
  815. if vi_mode():
  816. to_column += 1
  817. return from_column, to_column
  818. return None
  819. def cut_selection(self) -> tuple[Document, ClipboardData]:
  820. """
  821. Return a (:class:`.Document`, :class:`.ClipboardData`) tuple, where the
  822. document represents the new document when the selection is cut, and the
  823. clipboard data, represents whatever has to be put on the clipboard.
  824. """
  825. if self.selection:
  826. cut_parts = []
  827. remaining_parts = []
  828. new_cursor_position = self.cursor_position
  829. last_to = 0
  830. for from_, to in self.selection_ranges():
  831. if last_to == 0:
  832. new_cursor_position = from_
  833. remaining_parts.append(self.text[last_to:from_])
  834. cut_parts.append(self.text[from_:to])
  835. last_to = to
  836. remaining_parts.append(self.text[last_to:])
  837. cut_text = "\n".join(cut_parts)
  838. remaining_text = "".join(remaining_parts)
  839. # In case of a LINES selection, don't include the trailing newline.
  840. if self.selection.type == SelectionType.LINES and cut_text.endswith("\n"):
  841. cut_text = cut_text[:-1]
  842. return (
  843. Document(text=remaining_text, cursor_position=new_cursor_position),
  844. ClipboardData(cut_text, self.selection.type),
  845. )
  846. else:
  847. return self, ClipboardData("")
  848. def paste_clipboard_data(
  849. self,
  850. data: ClipboardData,
  851. paste_mode: PasteMode = PasteMode.EMACS,
  852. count: int = 1,
  853. ) -> Document:
  854. """
  855. Return a new :class:`.Document` instance which contains the result if
  856. we would paste this data at the current cursor position.
  857. :param paste_mode: Where to paste. (Before/after/emacs.)
  858. :param count: When >1, Paste multiple times.
  859. """
  860. before = paste_mode == PasteMode.VI_BEFORE
  861. after = paste_mode == PasteMode.VI_AFTER
  862. if data.type == SelectionType.CHARACTERS:
  863. if after:
  864. new_text = (
  865. self.text[: self.cursor_position + 1]
  866. + data.text * count
  867. + self.text[self.cursor_position + 1 :]
  868. )
  869. else:
  870. new_text = (
  871. self.text_before_cursor + data.text * count + self.text_after_cursor
  872. )
  873. new_cursor_position = self.cursor_position + len(data.text) * count
  874. if before:
  875. new_cursor_position -= 1
  876. elif data.type == SelectionType.LINES:
  877. l = self.cursor_position_row
  878. if before:
  879. lines = self.lines[:l] + [data.text] * count + self.lines[l:]
  880. new_text = "\n".join(lines)
  881. new_cursor_position = len("".join(self.lines[:l])) + l
  882. else:
  883. lines = self.lines[: l + 1] + [data.text] * count + self.lines[l + 1 :]
  884. new_cursor_position = len("".join(self.lines[: l + 1])) + l + 1
  885. new_text = "\n".join(lines)
  886. elif data.type == SelectionType.BLOCK:
  887. lines = self.lines[:]
  888. start_line = self.cursor_position_row
  889. start_column = self.cursor_position_col + (0 if before else 1)
  890. for i, line in enumerate(data.text.split("\n")):
  891. index = i + start_line
  892. if index >= len(lines):
  893. lines.append("")
  894. lines[index] = lines[index].ljust(start_column)
  895. lines[index] = (
  896. lines[index][:start_column]
  897. + line * count
  898. + lines[index][start_column:]
  899. )
  900. new_text = "\n".join(lines)
  901. new_cursor_position = self.cursor_position + (0 if before else 1)
  902. return Document(text=new_text, cursor_position=new_cursor_position)
  903. def empty_line_count_at_the_end(self) -> int:
  904. """
  905. Return number of empty lines at the end of the document.
  906. """
  907. count = 0
  908. for line in self.lines[::-1]:
  909. if not line or line.isspace():
  910. count += 1
  911. else:
  912. break
  913. return count
  914. def start_of_paragraph(self, count: int = 1, before: bool = False) -> int:
  915. """
  916. Return the start of the current paragraph. (Relative cursor position.)
  917. """
  918. def match_func(text: str) -> bool:
  919. return not text or text.isspace()
  920. line_index = self.find_previous_matching_line(
  921. match_func=match_func, count=count
  922. )
  923. if line_index:
  924. add = 0 if before else 1
  925. return min(0, self.get_cursor_up_position(count=-line_index) + add)
  926. else:
  927. return -self.cursor_position
  928. def end_of_paragraph(self, count: int = 1, after: bool = False) -> int:
  929. """
  930. Return the end of the current paragraph. (Relative cursor position.)
  931. """
  932. def match_func(text: str) -> bool:
  933. return not text or text.isspace()
  934. line_index = self.find_next_matching_line(match_func=match_func, count=count)
  935. if line_index:
  936. add = 0 if after else 1
  937. return max(0, self.get_cursor_down_position(count=line_index) - add)
  938. else:
  939. return len(self.text_after_cursor)
  940. # Modifiers.
  941. def insert_after(self, text: str) -> Document:
  942. """
  943. Create a new document, with this text inserted after the buffer.
  944. It keeps selection ranges and cursor position in sync.
  945. """
  946. return Document(
  947. text=self.text + text,
  948. cursor_position=self.cursor_position,
  949. selection=self.selection,
  950. )
  951. def insert_before(self, text: str) -> Document:
  952. """
  953. Create a new document, with this text inserted before the buffer.
  954. It keeps selection ranges and cursor position in sync.
  955. """
  956. selection_state = self.selection
  957. if selection_state:
  958. selection_state = SelectionState(
  959. original_cursor_position=selection_state.original_cursor_position
  960. + len(text),
  961. type=selection_state.type,
  962. )
  963. return Document(
  964. text=text + self.text,
  965. cursor_position=self.cursor_position + len(text),
  966. selection=selection_state,
  967. )