123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343 |
- # -*- coding: utf-8 -*-
- """
- pygments.lexers
- ~~~~~~~~~~~~~~~
- Pygments lexers.
- :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- import re
- import sys
- import types
- import fnmatch
- from os.path import basename
- from pygments.lexers._mapping import LEXERS
- from pygments.modeline import get_filetype_from_buffer
- from pygments.plugin import find_plugin_lexers
- from pygments.util import ClassNotFound, itervalues, guess_decode, text_type
- COMPAT = {
- 'Python3Lexer': 'PythonLexer',
- 'Python3TracebackLexer': 'PythonTracebackLexer',
- }
- __all__ = ['get_lexer_by_name', 'get_lexer_for_filename', 'find_lexer_class',
- 'guess_lexer', 'load_lexer_from_file'] + list(LEXERS) + list(COMPAT)
- _lexer_cache = {}
- _pattern_cache = {}
- def _fn_matches(fn, glob):
- """Return whether the supplied file name fn matches pattern filename."""
- if glob not in _pattern_cache:
- pattern = _pattern_cache[glob] = re.compile(fnmatch.translate(glob))
- return pattern.match(fn)
- return _pattern_cache[glob].match(fn)
- def _load_lexers(module_name):
- """Load a lexer (and all others in the module too)."""
- mod = __import__(module_name, None, None, ['__all__'])
- for lexer_name in mod.__all__:
- cls = getattr(mod, lexer_name)
- _lexer_cache[cls.name] = cls
- def get_all_lexers():
- """Return a generator of tuples in the form ``(name, aliases,
- filenames, mimetypes)`` of all know lexers.
- """
- for item in itervalues(LEXERS):
- yield item[1:]
- for lexer in find_plugin_lexers():
- yield lexer.name, lexer.aliases, lexer.filenames, lexer.mimetypes
- def find_lexer_class(name):
- """Lookup a lexer class by name.
- Return None if not found.
- """
- if name in _lexer_cache:
- return _lexer_cache[name]
- # lookup builtin lexers
- for module_name, lname, aliases, _, _ in itervalues(LEXERS):
- if name == lname:
- _load_lexers(module_name)
- return _lexer_cache[name]
- # continue with lexers from setuptools entrypoints
- for cls in find_plugin_lexers():
- if cls.name == name:
- return cls
- def find_lexer_class_by_name(_alias):
- """Lookup a lexer class by alias.
- Like `get_lexer_by_name`, but does not instantiate the class.
- .. versionadded:: 2.2
- """
- if not _alias:
- raise ClassNotFound('no lexer for alias %r found' % _alias)
- # lookup builtin lexers
- for module_name, name, aliases, _, _ in itervalues(LEXERS):
- if _alias.lower() in aliases:
- if name not in _lexer_cache:
- _load_lexers(module_name)
- return _lexer_cache[name]
- # continue with lexers from setuptools entrypoints
- for cls in find_plugin_lexers():
- if _alias.lower() in cls.aliases:
- return cls
- raise ClassNotFound('no lexer for alias %r found' % _alias)
- def get_lexer_by_name(_alias, **options):
- """Get a lexer by an alias.
- Raises ClassNotFound if not found.
- """
- if not _alias:
- raise ClassNotFound('no lexer for alias %r found' % _alias)
- # lookup builtin lexers
- for module_name, name, aliases, _, _ in itervalues(LEXERS):
- if _alias.lower() in aliases:
- if name not in _lexer_cache:
- _load_lexers(module_name)
- return _lexer_cache[name](**options)
- # continue with lexers from setuptools entrypoints
- for cls in find_plugin_lexers():
- if _alias.lower() in cls.aliases:
- return cls(**options)
- raise ClassNotFound('no lexer for alias %r found' % _alias)
- def load_lexer_from_file(filename, lexername="CustomLexer", **options):
- """Load a lexer from a file.
- This method expects a file located relative to the current working
- directory, which contains a Lexer class. By default, it expects the
- Lexer to be name CustomLexer; you can specify your own class name
- as the second argument to this function.
- Users should be very careful with the input, because this method
- is equivalent to running eval on the input file.
- Raises ClassNotFound if there are any problems importing the Lexer.
- .. versionadded:: 2.2
- """
- try:
- # This empty dict will contain the namespace for the exec'd file
- custom_namespace = {}
- with open(filename, 'rb') as f:
- exec(f.read(), custom_namespace)
- # Retrieve the class `lexername` from that namespace
- if lexername not in custom_namespace:
- raise ClassNotFound('no valid %s class found in %s' %
- (lexername, filename))
- lexer_class = custom_namespace[lexername]
- # And finally instantiate it with the options
- return lexer_class(**options)
- except IOError as err:
- raise ClassNotFound('cannot read %s: %s' % (filename, err))
- except ClassNotFound:
- raise
- except Exception as err:
- raise ClassNotFound('error when loading custom lexer: %s' % err)
- def find_lexer_class_for_filename(_fn, code=None):
- """Get a lexer for a filename.
- If multiple lexers match the filename pattern, use ``analyse_text()`` to
- figure out which one is more appropriate.
- Returns None if not found.
- """
- matches = []
- fn = basename(_fn)
- for modname, name, _, filenames, _ in itervalues(LEXERS):
- for filename in filenames:
- if _fn_matches(fn, filename):
- if name not in _lexer_cache:
- _load_lexers(modname)
- matches.append((_lexer_cache[name], filename))
- for cls in find_plugin_lexers():
- for filename in cls.filenames:
- if _fn_matches(fn, filename):
- matches.append((cls, filename))
- if sys.version_info > (3,) and isinstance(code, bytes):
- # decode it, since all analyse_text functions expect unicode
- code = guess_decode(code)
- def get_rating(info):
- cls, filename = info
- # explicit patterns get a bonus
- bonus = '*' not in filename and 0.5 or 0
- # The class _always_ defines analyse_text because it's included in
- # the Lexer class. The default implementation returns None which
- # gets turned into 0.0. Run scripts/detect_missing_analyse_text.py
- # to find lexers which need it overridden.
- if code:
- return cls.analyse_text(code) + bonus, cls.__name__
- return cls.priority + bonus, cls.__name__
- if matches:
- matches.sort(key=get_rating)
- # print "Possible lexers, after sort:", matches
- return matches[-1][0]
- def get_lexer_for_filename(_fn, code=None, **options):
- """Get a lexer for a filename.
- If multiple lexers match the filename pattern, use ``analyse_text()`` to
- figure out which one is more appropriate.
- Raises ClassNotFound if not found.
- """
- res = find_lexer_class_for_filename(_fn, code)
- if not res:
- raise ClassNotFound('no lexer for filename %r found' % _fn)
- return res(**options)
- def get_lexer_for_mimetype(_mime, **options):
- """Get a lexer for a mimetype.
- Raises ClassNotFound if not found.
- """
- for modname, name, _, _, mimetypes in itervalues(LEXERS):
- if _mime in mimetypes:
- if name not in _lexer_cache:
- _load_lexers(modname)
- return _lexer_cache[name](**options)
- for cls in find_plugin_lexers():
- if _mime in cls.mimetypes:
- return cls(**options)
- raise ClassNotFound('no lexer for mimetype %r found' % _mime)
- def _iter_lexerclasses(plugins=True):
- """Return an iterator over all lexer classes."""
- for key in sorted(LEXERS):
- module_name, name = LEXERS[key][:2]
- if name not in _lexer_cache:
- _load_lexers(module_name)
- yield _lexer_cache[name]
- if plugins:
- for lexer in find_plugin_lexers():
- yield lexer
- def guess_lexer_for_filename(_fn, _text, **options):
- """
- Lookup all lexers that handle those filenames primary (``filenames``)
- or secondary (``alias_filenames``). Then run a text analysis for those
- lexers and choose the best result.
- usage::
- >>> from pygments.lexers import guess_lexer_for_filename
- >>> guess_lexer_for_filename('hello.html', '<%= @foo %>')
- <pygments.lexers.templates.RhtmlLexer object at 0xb7d2f32c>
- >>> guess_lexer_for_filename('hello.html', '<h1>{{ title|e }}</h1>')
- <pygments.lexers.templates.HtmlDjangoLexer object at 0xb7d2f2ac>
- >>> guess_lexer_for_filename('style.css', 'a { color: <?= $link ?> }')
- <pygments.lexers.templates.CssPhpLexer object at 0xb7ba518c>
- """
- fn = basename(_fn)
- primary = {}
- matching_lexers = set()
- for lexer in _iter_lexerclasses():
- for filename in lexer.filenames:
- if _fn_matches(fn, filename):
- matching_lexers.add(lexer)
- primary[lexer] = True
- for filename in lexer.alias_filenames:
- if _fn_matches(fn, filename):
- matching_lexers.add(lexer)
- primary[lexer] = False
- if not matching_lexers:
- raise ClassNotFound('no lexer for filename %r found' % fn)
- if len(matching_lexers) == 1:
- return matching_lexers.pop()(**options)
- result = []
- for lexer in matching_lexers:
- rv = lexer.analyse_text(_text)
- if rv == 1.0:
- return lexer(**options)
- result.append((rv, lexer))
- def type_sort(t):
- # sort by:
- # - analyse score
- # - is primary filename pattern?
- # - priority
- # - last resort: class name
- return (t[0], primary[t[1]], t[1].priority, t[1].__name__)
- result.sort(key=type_sort)
- return result[-1][1](**options)
- def guess_lexer(_text, **options):
- """Guess a lexer by strong distinctions in the text (eg, shebang)."""
- if not isinstance(_text, text_type):
- inencoding = options.get('inencoding', options.get('encoding'))
- if inencoding:
- _text = _text.decode(inencoding or 'utf8')
- else:
- _text, _ = guess_decode(_text)
- # try to get a vim modeline first
- ft = get_filetype_from_buffer(_text)
- if ft is not None:
- try:
- return get_lexer_by_name(ft, **options)
- except ClassNotFound:
- pass
- best_lexer = [0.0, None]
- for lexer in _iter_lexerclasses():
- rv = lexer.analyse_text(_text)
- if rv == 1.0:
- return lexer(**options)
- if rv > best_lexer[0]:
- best_lexer[:] = (rv, lexer)
- if not best_lexer[0] or best_lexer[1] is None:
- raise ClassNotFound('no lexer matching the text found')
- return best_lexer[1](**options)
- class _automodule(types.ModuleType):
- """Automatically import lexers."""
- def __getattr__(self, name):
- info = LEXERS.get(name)
- if info:
- _load_lexers(info[0])
- cls = _lexer_cache[info[1]]
- setattr(self, name, cls)
- return cls
- if name in COMPAT:
- return getattr(self, COMPAT[name])
- raise AttributeError(name)
- oldmod = sys.modules[__name__]
- newmod = _automodule(__name__)
- newmod.__dict__.update(oldmod.__dict__)
- sys.modules[__name__] = newmod
- del newmod.newmod, newmod.oldmod, newmod.sys, newmod.types
|