__init__.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. """
  2. pygments.lexers
  3. ~~~~~~~~~~~~~~~
  4. Pygments lexers.
  5. :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. import sys
  10. import types
  11. import fnmatch
  12. from os.path import basename
  13. from pygments.lexers._mapping import LEXERS
  14. from pygments.modeline import get_filetype_from_buffer
  15. from pygments.plugin import find_plugin_lexers
  16. from pygments.util import ClassNotFound, guess_decode
  17. COMPAT = {
  18. 'Python3Lexer': 'PythonLexer',
  19. 'Python3TracebackLexer': 'PythonTracebackLexer',
  20. 'LeanLexer': 'Lean3Lexer',
  21. }
  22. __all__ = ['get_lexer_by_name', 'get_lexer_for_filename', 'find_lexer_class',
  23. 'guess_lexer', 'load_lexer_from_file'] + list(LEXERS) + list(COMPAT)
  24. _lexer_cache = {}
  25. _pattern_cache = {}
  26. def _fn_matches(fn, glob):
  27. """Return whether the supplied file name fn matches pattern filename."""
  28. if glob not in _pattern_cache:
  29. pattern = _pattern_cache[glob] = re.compile(fnmatch.translate(glob))
  30. return pattern.match(fn)
  31. return _pattern_cache[glob].match(fn)
  32. def _load_lexers(module_name):
  33. """Load a lexer (and all others in the module too)."""
  34. mod = __import__(module_name, None, None, ['__all__'])
  35. for lexer_name in mod.__all__:
  36. cls = getattr(mod, lexer_name)
  37. _lexer_cache[cls.name] = cls
  38. def get_all_lexers(plugins=True):
  39. """Return a generator of tuples in the form ``(name, aliases,
  40. filenames, mimetypes)`` of all know lexers.
  41. If *plugins* is true (the default), plugin lexers supplied by entrypoints
  42. are also returned. Otherwise, only builtin ones are considered.
  43. """
  44. for item in LEXERS.values():
  45. yield item[1:]
  46. if plugins:
  47. for lexer in find_plugin_lexers():
  48. yield lexer.name, lexer.aliases, lexer.filenames, lexer.mimetypes
  49. def find_lexer_class(name):
  50. """
  51. Return the `Lexer` subclass that with the *name* attribute as given by
  52. the *name* argument.
  53. """
  54. if name in _lexer_cache:
  55. return _lexer_cache[name]
  56. # lookup builtin lexers
  57. for module_name, lname, aliases, _, _ in LEXERS.values():
  58. if name == lname:
  59. _load_lexers(module_name)
  60. return _lexer_cache[name]
  61. # continue with lexers from setuptools entrypoints
  62. for cls in find_plugin_lexers():
  63. if cls.name == name:
  64. return cls
  65. def find_lexer_class_by_name(_alias):
  66. """
  67. Return the `Lexer` subclass that has `alias` in its aliases list, without
  68. instantiating it.
  69. Like `get_lexer_by_name`, but does not instantiate the class.
  70. Will raise :exc:`pygments.util.ClassNotFound` if no lexer with that alias is
  71. found.
  72. .. versionadded:: 2.2
  73. """
  74. if not _alias:
  75. raise ClassNotFound('no lexer for alias %r found' % _alias)
  76. # lookup builtin lexers
  77. for module_name, name, aliases, _, _ in LEXERS.values():
  78. if _alias.lower() in aliases:
  79. if name not in _lexer_cache:
  80. _load_lexers(module_name)
  81. return _lexer_cache[name]
  82. # continue with lexers from setuptools entrypoints
  83. for cls in find_plugin_lexers():
  84. if _alias.lower() in cls.aliases:
  85. return cls
  86. raise ClassNotFound('no lexer for alias %r found' % _alias)
  87. def get_lexer_by_name(_alias, **options):
  88. """
  89. Return an instance of a `Lexer` subclass that has `alias` in its
  90. aliases list. The lexer is given the `options` at its
  91. instantiation.
  92. Will raise :exc:`pygments.util.ClassNotFound` if no lexer with that alias is
  93. found.
  94. """
  95. if not _alias:
  96. raise ClassNotFound('no lexer for alias %r found' % _alias)
  97. # lookup builtin lexers
  98. for module_name, name, aliases, _, _ in LEXERS.values():
  99. if _alias.lower() in aliases:
  100. if name not in _lexer_cache:
  101. _load_lexers(module_name)
  102. return _lexer_cache[name](**options)
  103. # continue with lexers from setuptools entrypoints
  104. for cls in find_plugin_lexers():
  105. if _alias.lower() in cls.aliases:
  106. return cls(**options)
  107. raise ClassNotFound('no lexer for alias %r found' % _alias)
  108. def load_lexer_from_file(filename, lexername="CustomLexer", **options):
  109. """Load a lexer from a file.
  110. This method expects a file located relative to the current working
  111. directory, which contains a Lexer class. By default, it expects the
  112. Lexer to be name CustomLexer; you can specify your own class name
  113. as the second argument to this function.
  114. Users should be very careful with the input, because this method
  115. is equivalent to running eval on the input file.
  116. Raises ClassNotFound if there are any problems importing the Lexer.
  117. .. versionadded:: 2.2
  118. """
  119. try:
  120. # This empty dict will contain the namespace for the exec'd file
  121. custom_namespace = {}
  122. with open(filename, 'rb') as f:
  123. exec(f.read(), custom_namespace)
  124. # Retrieve the class `lexername` from that namespace
  125. if lexername not in custom_namespace:
  126. raise ClassNotFound('no valid %s class found in %s' %
  127. (lexername, filename))
  128. lexer_class = custom_namespace[lexername]
  129. # And finally instantiate it with the options
  130. return lexer_class(**options)
  131. except OSError as err:
  132. raise ClassNotFound('cannot read %s: %s' % (filename, err))
  133. except ClassNotFound:
  134. raise
  135. except Exception as err:
  136. raise ClassNotFound('error when loading custom lexer: %s' % err)
  137. def find_lexer_class_for_filename(_fn, code=None):
  138. """Get a lexer for a filename.
  139. If multiple lexers match the filename pattern, use ``analyse_text()`` to
  140. figure out which one is more appropriate.
  141. Returns None if not found.
  142. """
  143. matches = []
  144. fn = basename(_fn)
  145. for modname, name, _, filenames, _ in LEXERS.values():
  146. for filename in filenames:
  147. if _fn_matches(fn, filename):
  148. if name not in _lexer_cache:
  149. _load_lexers(modname)
  150. matches.append((_lexer_cache[name], filename))
  151. for cls in find_plugin_lexers():
  152. for filename in cls.filenames:
  153. if _fn_matches(fn, filename):
  154. matches.append((cls, filename))
  155. if isinstance(code, bytes):
  156. # decode it, since all analyse_text functions expect unicode
  157. code = guess_decode(code)
  158. def get_rating(info):
  159. cls, filename = info
  160. # explicit patterns get a bonus
  161. bonus = '*' not in filename and 0.5 or 0
  162. # The class _always_ defines analyse_text because it's included in
  163. # the Lexer class. The default implementation returns None which
  164. # gets turned into 0.0. Run scripts/detect_missing_analyse_text.py
  165. # to find lexers which need it overridden.
  166. if code:
  167. return cls.analyse_text(code) + bonus, cls.__name__
  168. return cls.priority + bonus, cls.__name__
  169. if matches:
  170. matches.sort(key=get_rating)
  171. # print "Possible lexers, after sort:", matches
  172. return matches[-1][0]
  173. def get_lexer_for_filename(_fn, code=None, **options):
  174. """Get a lexer for a filename.
  175. Return a `Lexer` subclass instance that has a filename pattern
  176. matching `fn`. The lexer is given the `options` at its
  177. instantiation.
  178. Raise :exc:`pygments.util.ClassNotFound` if no lexer for that filename
  179. is found.
  180. If multiple lexers match the filename pattern, use their ``analyse_text()``
  181. methods to figure out which one is more appropriate.
  182. """
  183. res = find_lexer_class_for_filename(_fn, code)
  184. if not res:
  185. raise ClassNotFound('no lexer for filename %r found' % _fn)
  186. return res(**options)
  187. def get_lexer_for_mimetype(_mime, **options):
  188. """
  189. Return a `Lexer` subclass instance that has `mime` in its mimetype
  190. list. The lexer is given the `options` at its instantiation.
  191. Will raise :exc:`pygments.util.ClassNotFound` if not lexer for that mimetype
  192. is found.
  193. """
  194. for modname, name, _, _, mimetypes in LEXERS.values():
  195. if _mime in mimetypes:
  196. if name not in _lexer_cache:
  197. _load_lexers(modname)
  198. return _lexer_cache[name](**options)
  199. for cls in find_plugin_lexers():
  200. if _mime in cls.mimetypes:
  201. return cls(**options)
  202. raise ClassNotFound('no lexer for mimetype %r found' % _mime)
  203. def _iter_lexerclasses(plugins=True):
  204. """Return an iterator over all lexer classes."""
  205. for key in sorted(LEXERS):
  206. module_name, name = LEXERS[key][:2]
  207. if name not in _lexer_cache:
  208. _load_lexers(module_name)
  209. yield _lexer_cache[name]
  210. if plugins:
  211. yield from find_plugin_lexers()
  212. def guess_lexer_for_filename(_fn, _text, **options):
  213. """
  214. As :func:`guess_lexer()`, but only lexers which have a pattern in `filenames`
  215. or `alias_filenames` that matches `filename` are taken into consideration.
  216. :exc:`pygments.util.ClassNotFound` is raised if no lexer thinks it can
  217. handle the content.
  218. """
  219. fn = basename(_fn)
  220. primary = {}
  221. matching_lexers = set()
  222. for lexer in _iter_lexerclasses():
  223. for filename in lexer.filenames:
  224. if _fn_matches(fn, filename):
  225. matching_lexers.add(lexer)
  226. primary[lexer] = True
  227. for filename in lexer.alias_filenames:
  228. if _fn_matches(fn, filename):
  229. matching_lexers.add(lexer)
  230. primary[lexer] = False
  231. if not matching_lexers:
  232. raise ClassNotFound('no lexer for filename %r found' % fn)
  233. if len(matching_lexers) == 1:
  234. return matching_lexers.pop()(**options)
  235. result = []
  236. for lexer in matching_lexers:
  237. rv = lexer.analyse_text(_text)
  238. if rv == 1.0:
  239. return lexer(**options)
  240. result.append((rv, lexer))
  241. def type_sort(t):
  242. # sort by:
  243. # - analyse score
  244. # - is primary filename pattern?
  245. # - priority
  246. # - last resort: class name
  247. return (t[0], primary[t[1]], t[1].priority, t[1].__name__)
  248. result.sort(key=type_sort)
  249. return result[-1][1](**options)
  250. def guess_lexer(_text, **options):
  251. """
  252. Return a `Lexer` subclass instance that's guessed from the text in
  253. `text`. For that, the :meth:`.analyse_text()` method of every known lexer
  254. class is called with the text as argument, and the lexer which returned the
  255. highest value will be instantiated and returned.
  256. :exc:`pygments.util.ClassNotFound` is raised if no lexer thinks it can
  257. handle the content.
  258. """
  259. if not isinstance(_text, str):
  260. inencoding = options.get('inencoding', options.get('encoding'))
  261. if inencoding:
  262. _text = _text.decode(inencoding or 'utf8')
  263. else:
  264. _text, _ = guess_decode(_text)
  265. # try to get a vim modeline first
  266. ft = get_filetype_from_buffer(_text)
  267. if ft is not None:
  268. try:
  269. return get_lexer_by_name(ft, **options)
  270. except ClassNotFound:
  271. pass
  272. best_lexer = [0.0, None]
  273. for lexer in _iter_lexerclasses():
  274. rv = lexer.analyse_text(_text)
  275. if rv == 1.0:
  276. return lexer(**options)
  277. if rv > best_lexer[0]:
  278. best_lexer[:] = (rv, lexer)
  279. if not best_lexer[0] or best_lexer[1] is None:
  280. raise ClassNotFound('no lexer matching the text found')
  281. return best_lexer[1](**options)
  282. class _automodule(types.ModuleType):
  283. """Automatically import lexers."""
  284. def __getattr__(self, name):
  285. info = LEXERS.get(name)
  286. if info:
  287. _load_lexers(info[0])
  288. cls = _lexer_cache[info[1]]
  289. setattr(self, name, cls)
  290. return cls
  291. if name in COMPAT:
  292. return getattr(self, COMPAT[name])
  293. raise AttributeError(name)
  294. oldmod = sys.modules[__name__]
  295. newmod = _automodule(__name__)
  296. newmod.__dict__.update(oldmod.__dict__)
  297. sys.modules[__name__] = newmod
  298. del newmod.newmod, newmod.oldmod, newmod.sys, newmod.types