scanner.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.scanner
  4. ~~~~~~~~~~~~~~~~
  5. This library implements a regex based scanner. Some languages
  6. like Pascal are easy to parse but have some keywords that
  7. depend on the context. Because of this it's impossible to lex
  8. that just by using a regular expression lexer like the
  9. `RegexLexer`.
  10. Have a look at the `DelphiLexer` to get an idea of how to use
  11. this scanner.
  12. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  13. :license: BSD, see LICENSE for details.
  14. """
  15. import re
  16. class EndOfText(RuntimeError):
  17. """
  18. Raise if end of text is reached and the user
  19. tried to call a match function.
  20. """
  21. class Scanner(object):
  22. """
  23. Simple scanner
  24. All method patterns are regular expression strings (not
  25. compiled expressions!)
  26. """
  27. def __init__(self, text, flags=0):
  28. """
  29. :param text: The text which should be scanned
  30. :param flags: default regular expression flags
  31. """
  32. self.data = text
  33. self.data_length = len(text)
  34. self.start_pos = 0
  35. self.pos = 0
  36. self.flags = flags
  37. self.last = None
  38. self.match = None
  39. self._re_cache = {}
  40. def eos(self):
  41. """`True` if the scanner reached the end of text."""
  42. return self.pos >= self.data_length
  43. eos = property(eos, eos.__doc__)
  44. def check(self, pattern):
  45. """
  46. Apply `pattern` on the current position and return
  47. the match object. (Doesn't touch pos). Use this for
  48. lookahead.
  49. """
  50. if self.eos:
  51. raise EndOfText()
  52. if pattern not in self._re_cache:
  53. self._re_cache[pattern] = re.compile(pattern, self.flags)
  54. return self._re_cache[pattern].match(self.data, self.pos)
  55. def test(self, pattern):
  56. """Apply a pattern on the current position and check
  57. if it patches. Doesn't touch pos.
  58. """
  59. return self.check(pattern) is not None
  60. def scan(self, pattern):
  61. """
  62. Scan the text for the given pattern and update pos/match
  63. and related fields. The return value is a boolen that
  64. indicates if the pattern matched. The matched value is
  65. stored on the instance as ``match``, the last value is
  66. stored as ``last``. ``start_pos`` is the position of the
  67. pointer before the pattern was matched, ``pos`` is the
  68. end position.
  69. """
  70. if self.eos:
  71. raise EndOfText()
  72. if pattern not in self._re_cache:
  73. self._re_cache[pattern] = re.compile(pattern, self.flags)
  74. self.last = self.match
  75. m = self._re_cache[pattern].match(self.data, self.pos)
  76. if m is None:
  77. return False
  78. self.start_pos = m.start()
  79. self.pos = m.end()
  80. self.match = m.group()
  81. return True
  82. def get_char(self):
  83. """Scan exactly one char."""
  84. self.scan('.')
  85. def __repr__(self):
  86. return '<%s %d/%d>' % (
  87. self.__class__.__name__,
  88. self.pos,
  89. self.data_length
  90. )