__init__.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers
  4. ~~~~~~~~~~~~~~~
  5. Pygments lexers.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. import sys
  11. import types
  12. import fnmatch
  13. from os.path import basename
  14. from pygments.lexers._mapping import LEXERS
  15. from pygments.modeline import get_filetype_from_buffer
  16. from pygments.plugin import find_plugin_lexers
  17. from pygments.util import ClassNotFound, itervalues, guess_decode, text_type
  18. COMPAT = {
  19. 'Python3Lexer': 'PythonLexer',
  20. 'Python3TracebackLexer': 'PythonTracebackLexer',
  21. }
  22. __all__ = ['get_lexer_by_name', 'get_lexer_for_filename', 'find_lexer_class',
  23. 'guess_lexer', 'load_lexer_from_file'] + list(LEXERS) + list(COMPAT)
  24. _lexer_cache = {}
  25. _pattern_cache = {}
  26. def _fn_matches(fn, glob):
  27. """Return whether the supplied file name fn matches pattern filename."""
  28. if glob not in _pattern_cache:
  29. pattern = _pattern_cache[glob] = re.compile(fnmatch.translate(glob))
  30. return pattern.match(fn)
  31. return _pattern_cache[glob].match(fn)
  32. def _load_lexers(module_name):
  33. """Load a lexer (and all others in the module too)."""
  34. mod = __import__(module_name, None, None, ['__all__'])
  35. for lexer_name in mod.__all__:
  36. cls = getattr(mod, lexer_name)
  37. _lexer_cache[cls.name] = cls
  38. def get_all_lexers():
  39. """Return a generator of tuples in the form ``(name, aliases,
  40. filenames, mimetypes)`` of all know lexers.
  41. """
  42. for item in itervalues(LEXERS):
  43. yield item[1:]
  44. for lexer in find_plugin_lexers():
  45. yield lexer.name, lexer.aliases, lexer.filenames, lexer.mimetypes
  46. def find_lexer_class(name):
  47. """Lookup a lexer class by name.
  48. Return None if not found.
  49. """
  50. if name in _lexer_cache:
  51. return _lexer_cache[name]
  52. # lookup builtin lexers
  53. for module_name, lname, aliases, _, _ in itervalues(LEXERS):
  54. if name == lname:
  55. _load_lexers(module_name)
  56. return _lexer_cache[name]
  57. # continue with lexers from setuptools entrypoints
  58. for cls in find_plugin_lexers():
  59. if cls.name == name:
  60. return cls
  61. def find_lexer_class_by_name(_alias):
  62. """Lookup a lexer class by alias.
  63. Like `get_lexer_by_name`, but does not instantiate the class.
  64. .. versionadded:: 2.2
  65. """
  66. if not _alias:
  67. raise ClassNotFound('no lexer for alias %r found' % _alias)
  68. # lookup builtin lexers
  69. for module_name, name, aliases, _, _ in itervalues(LEXERS):
  70. if _alias.lower() in aliases:
  71. if name not in _lexer_cache:
  72. _load_lexers(module_name)
  73. return _lexer_cache[name]
  74. # continue with lexers from setuptools entrypoints
  75. for cls in find_plugin_lexers():
  76. if _alias.lower() in cls.aliases:
  77. return cls
  78. raise ClassNotFound('no lexer for alias %r found' % _alias)
  79. def get_lexer_by_name(_alias, **options):
  80. """Get a lexer by an alias.
  81. Raises ClassNotFound if not found.
  82. """
  83. if not _alias:
  84. raise ClassNotFound('no lexer for alias %r found' % _alias)
  85. # lookup builtin lexers
  86. for module_name, name, aliases, _, _ in itervalues(LEXERS):
  87. if _alias.lower() in aliases:
  88. if name not in _lexer_cache:
  89. _load_lexers(module_name)
  90. return _lexer_cache[name](**options)
  91. # continue with lexers from setuptools entrypoints
  92. for cls in find_plugin_lexers():
  93. if _alias.lower() in cls.aliases:
  94. return cls(**options)
  95. raise ClassNotFound('no lexer for alias %r found' % _alias)
  96. def load_lexer_from_file(filename, lexername="CustomLexer", **options):
  97. """Load a lexer from a file.
  98. This method expects a file located relative to the current working
  99. directory, which contains a Lexer class. By default, it expects the
  100. Lexer to be name CustomLexer; you can specify your own class name
  101. as the second argument to this function.
  102. Users should be very careful with the input, because this method
  103. is equivalent to running eval on the input file.
  104. Raises ClassNotFound if there are any problems importing the Lexer.
  105. .. versionadded:: 2.2
  106. """
  107. try:
  108. # This empty dict will contain the namespace for the exec'd file
  109. custom_namespace = {}
  110. with open(filename, 'rb') as f:
  111. exec(f.read(), custom_namespace)
  112. # Retrieve the class `lexername` from that namespace
  113. if lexername not in custom_namespace:
  114. raise ClassNotFound('no valid %s class found in %s' %
  115. (lexername, filename))
  116. lexer_class = custom_namespace[lexername]
  117. # And finally instantiate it with the options
  118. return lexer_class(**options)
  119. except IOError as err:
  120. raise ClassNotFound('cannot read %s: %s' % (filename, err))
  121. except ClassNotFound:
  122. raise
  123. except Exception as err:
  124. raise ClassNotFound('error when loading custom lexer: %s' % err)
  125. def find_lexer_class_for_filename(_fn, code=None):
  126. """Get a lexer for a filename.
  127. If multiple lexers match the filename pattern, use ``analyse_text()`` to
  128. figure out which one is more appropriate.
  129. Returns None if not found.
  130. """
  131. matches = []
  132. fn = basename(_fn)
  133. for modname, name, _, filenames, _ in itervalues(LEXERS):
  134. for filename in filenames:
  135. if _fn_matches(fn, filename):
  136. if name not in _lexer_cache:
  137. _load_lexers(modname)
  138. matches.append((_lexer_cache[name], filename))
  139. for cls in find_plugin_lexers():
  140. for filename in cls.filenames:
  141. if _fn_matches(fn, filename):
  142. matches.append((cls, filename))
  143. if sys.version_info > (3,) and isinstance(code, bytes):
  144. # decode it, since all analyse_text functions expect unicode
  145. code = guess_decode(code)
  146. def get_rating(info):
  147. cls, filename = info
  148. # explicit patterns get a bonus
  149. bonus = '*' not in filename and 0.5 or 0
  150. # The class _always_ defines analyse_text because it's included in
  151. # the Lexer class. The default implementation returns None which
  152. # gets turned into 0.0. Run scripts/detect_missing_analyse_text.py
  153. # to find lexers which need it overridden.
  154. if code:
  155. return cls.analyse_text(code) + bonus, cls.__name__
  156. return cls.priority + bonus, cls.__name__
  157. if matches:
  158. matches.sort(key=get_rating)
  159. # print "Possible lexers, after sort:", matches
  160. return matches[-1][0]
  161. def get_lexer_for_filename(_fn, code=None, **options):
  162. """Get a lexer for a filename.
  163. If multiple lexers match the filename pattern, use ``analyse_text()`` to
  164. figure out which one is more appropriate.
  165. Raises ClassNotFound if not found.
  166. """
  167. res = find_lexer_class_for_filename(_fn, code)
  168. if not res:
  169. raise ClassNotFound('no lexer for filename %r found' % _fn)
  170. return res(**options)
  171. def get_lexer_for_mimetype(_mime, **options):
  172. """Get a lexer for a mimetype.
  173. Raises ClassNotFound if not found.
  174. """
  175. for modname, name, _, _, mimetypes in itervalues(LEXERS):
  176. if _mime in mimetypes:
  177. if name not in _lexer_cache:
  178. _load_lexers(modname)
  179. return _lexer_cache[name](**options)
  180. for cls in find_plugin_lexers():
  181. if _mime in cls.mimetypes:
  182. return cls(**options)
  183. raise ClassNotFound('no lexer for mimetype %r found' % _mime)
  184. def _iter_lexerclasses(plugins=True):
  185. """Return an iterator over all lexer classes."""
  186. for key in sorted(LEXERS):
  187. module_name, name = LEXERS[key][:2]
  188. if name not in _lexer_cache:
  189. _load_lexers(module_name)
  190. yield _lexer_cache[name]
  191. if plugins:
  192. for lexer in find_plugin_lexers():
  193. yield lexer
  194. def guess_lexer_for_filename(_fn, _text, **options):
  195. """
  196. Lookup all lexers that handle those filenames primary (``filenames``)
  197. or secondary (``alias_filenames``). Then run a text analysis for those
  198. lexers and choose the best result.
  199. usage::
  200. >>> from pygments.lexers import guess_lexer_for_filename
  201. >>> guess_lexer_for_filename('hello.html', '<%= @foo %>')
  202. <pygments.lexers.templates.RhtmlLexer object at 0xb7d2f32c>
  203. >>> guess_lexer_for_filename('hello.html', '<h1>{{ title|e }}</h1>')
  204. <pygments.lexers.templates.HtmlDjangoLexer object at 0xb7d2f2ac>
  205. >>> guess_lexer_for_filename('style.css', 'a { color: <?= $link ?> }')
  206. <pygments.lexers.templates.CssPhpLexer object at 0xb7ba518c>
  207. """
  208. fn = basename(_fn)
  209. primary = {}
  210. matching_lexers = set()
  211. for lexer in _iter_lexerclasses():
  212. for filename in lexer.filenames:
  213. if _fn_matches(fn, filename):
  214. matching_lexers.add(lexer)
  215. primary[lexer] = True
  216. for filename in lexer.alias_filenames:
  217. if _fn_matches(fn, filename):
  218. matching_lexers.add(lexer)
  219. primary[lexer] = False
  220. if not matching_lexers:
  221. raise ClassNotFound('no lexer for filename %r found' % fn)
  222. if len(matching_lexers) == 1:
  223. return matching_lexers.pop()(**options)
  224. result = []
  225. for lexer in matching_lexers:
  226. rv = lexer.analyse_text(_text)
  227. if rv == 1.0:
  228. return lexer(**options)
  229. result.append((rv, lexer))
  230. def type_sort(t):
  231. # sort by:
  232. # - analyse score
  233. # - is primary filename pattern?
  234. # - priority
  235. # - last resort: class name
  236. return (t[0], primary[t[1]], t[1].priority, t[1].__name__)
  237. result.sort(key=type_sort)
  238. return result[-1][1](**options)
  239. def guess_lexer(_text, **options):
  240. """Guess a lexer by strong distinctions in the text (eg, shebang)."""
  241. if not isinstance(_text, text_type):
  242. inencoding = options.get('inencoding', options.get('encoding'))
  243. if inencoding:
  244. _text = _text.decode(inencoding or 'utf8')
  245. else:
  246. _text, _ = guess_decode(_text)
  247. # try to get a vim modeline first
  248. ft = get_filetype_from_buffer(_text)
  249. if ft is not None:
  250. try:
  251. return get_lexer_by_name(ft, **options)
  252. except ClassNotFound:
  253. pass
  254. best_lexer = [0.0, None]
  255. for lexer in _iter_lexerclasses():
  256. rv = lexer.analyse_text(_text)
  257. if rv == 1.0:
  258. return lexer(**options)
  259. if rv > best_lexer[0]:
  260. best_lexer[:] = (rv, lexer)
  261. if not best_lexer[0] or best_lexer[1] is None:
  262. raise ClassNotFound('no lexer matching the text found')
  263. return best_lexer[1](**options)
  264. class _automodule(types.ModuleType):
  265. """Automatically import lexers."""
  266. def __getattr__(self, name):
  267. info = LEXERS.get(name)
  268. if info:
  269. _load_lexers(info[0])
  270. cls = _lexer_cache[info[1]]
  271. setattr(self, name, cls)
  272. return cls
  273. if name in COMPAT:
  274. return getattr(self, COMPAT[name])
  275. raise AttributeError(name)
  276. oldmod = sys.modules[__name__]
  277. newmod = _automodule(__name__)
  278. newmod.__dict__.update(oldmod.__dict__)
  279. sys.modules[__name__] = newmod
  280. del newmod.newmod, newmod.oldmod, newmod.sys, newmod.types