123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561 |
- # -*- coding: utf-8 -*-
- """
- pygments.lexers.data
- ~~~~~~~~~~~~~~~~~~~~
- Lexers for data file format.
- :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- import re
- from pygments.lexer import RegexLexer, ExtendedRegexLexer, LexerContext, \
- include, bygroups, inherit
- from pygments.token import Text, Comment, Keyword, Name, String, Number, \
- Punctuation, Literal, Error
- __all__ = ['YamlLexer', 'JsonLexer', 'JsonBareObjectLexer', 'JsonLdLexer']
- class YamlLexerContext(LexerContext):
- """Indentation context for the YAML lexer."""
- def __init__(self, *args, **kwds):
- super(YamlLexerContext, self).__init__(*args, **kwds)
- self.indent_stack = []
- self.indent = -1
- self.next_indent = 0
- self.block_scalar_indent = None
- class YamlLexer(ExtendedRegexLexer):
- """
- Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization
- language.
- .. versionadded:: 0.11
- """
- name = 'YAML'
- aliases = ['yaml']
- filenames = ['*.yaml', '*.yml']
- mimetypes = ['text/x-yaml']
- def something(token_class):
- """Do not produce empty tokens."""
- def callback(lexer, match, context):
- text = match.group()
- if not text:
- return
- yield match.start(), token_class, text
- context.pos = match.end()
- return callback
- def reset_indent(token_class):
- """Reset the indentation levels."""
- def callback(lexer, match, context):
- text = match.group()
- context.indent_stack = []
- context.indent = -1
- context.next_indent = 0
- context.block_scalar_indent = None
- yield match.start(), token_class, text
- context.pos = match.end()
- return callback
- def save_indent(token_class, start=False):
- """Save a possible indentation level."""
- def callback(lexer, match, context):
- text = match.group()
- extra = ''
- if start:
- context.next_indent = len(text)
- if context.next_indent < context.indent:
- while context.next_indent < context.indent:
- context.indent = context.indent_stack.pop()
- if context.next_indent > context.indent:
- extra = text[context.indent:]
- text = text[:context.indent]
- else:
- context.next_indent += len(text)
- if text:
- yield match.start(), token_class, text
- if extra:
- yield match.start()+len(text), token_class.Error, extra
- context.pos = match.end()
- return callback
- def set_indent(token_class, implicit=False):
- """Set the previously saved indentation level."""
- def callback(lexer, match, context):
- text = match.group()
- if context.indent < context.next_indent:
- context.indent_stack.append(context.indent)
- context.indent = context.next_indent
- if not implicit:
- context.next_indent += len(text)
- yield match.start(), token_class, text
- context.pos = match.end()
- return callback
- def set_block_scalar_indent(token_class):
- """Set an explicit indentation level for a block scalar."""
- def callback(lexer, match, context):
- text = match.group()
- context.block_scalar_indent = None
- if not text:
- return
- increment = match.group(1)
- if increment:
- current_indent = max(context.indent, 0)
- increment = int(increment)
- context.block_scalar_indent = current_indent + increment
- if text:
- yield match.start(), token_class, text
- context.pos = match.end()
- return callback
- def parse_block_scalar_empty_line(indent_token_class, content_token_class):
- """Process an empty line in a block scalar."""
- def callback(lexer, match, context):
- text = match.group()
- if (context.block_scalar_indent is None or
- len(text) <= context.block_scalar_indent):
- if text:
- yield match.start(), indent_token_class, text
- else:
- indentation = text[:context.block_scalar_indent]
- content = text[context.block_scalar_indent:]
- yield match.start(), indent_token_class, indentation
- yield (match.start()+context.block_scalar_indent,
- content_token_class, content)
- context.pos = match.end()
- return callback
- def parse_block_scalar_indent(token_class):
- """Process indentation spaces in a block scalar."""
- def callback(lexer, match, context):
- text = match.group()
- if context.block_scalar_indent is None:
- if len(text) <= max(context.indent, 0):
- context.stack.pop()
- context.stack.pop()
- return
- context.block_scalar_indent = len(text)
- else:
- if len(text) < context.block_scalar_indent:
- context.stack.pop()
- context.stack.pop()
- return
- if text:
- yield match.start(), token_class, text
- context.pos = match.end()
- return callback
- def parse_plain_scalar_indent(token_class):
- """Process indentation spaces in a plain scalar."""
- def callback(lexer, match, context):
- text = match.group()
- if len(text) <= context.indent:
- context.stack.pop()
- context.stack.pop()
- return
- if text:
- yield match.start(), token_class, text
- context.pos = match.end()
- return callback
- tokens = {
- # the root rules
- 'root': [
- # ignored whitespaces
- (r'[ ]+(?=#|$)', Text),
- # line breaks
- (r'\n+', Text),
- # a comment
- (r'#[^\n]*', Comment.Single),
- # the '%YAML' directive
- (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'),
- # the %TAG directive
- (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'),
- # document start and document end indicators
- (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace),
- 'block-line'),
- # indentation spaces
- (r'[ ]*(?!\s|$)', save_indent(Text, start=True),
- ('block-line', 'indentation')),
- ],
- # trailing whitespaces after directives or a block scalar indicator
- 'ignored-line': [
- # ignored whitespaces
- (r'[ ]+(?=#|$)', Text),
- # a comment
- (r'#[^\n]*', Comment.Single),
- # line break
- (r'\n', Text, '#pop:2'),
- ],
- # the %YAML directive
- 'yaml-directive': [
- # the version number
- (r'([ ]+)([0-9]+\.[0-9]+)',
- bygroups(Text, Number), 'ignored-line'),
- ],
- # the %TAG directive
- 'tag-directive': [
- # a tag handle and the corresponding prefix
- (r'([ ]+)(!|![\w-]*!)'
- r'([ ]+)(!|!?[\w;/?:@&=+$,.!~*\'()\[\]%-]+)',
- bygroups(Text, Keyword.Type, Text, Keyword.Type),
- 'ignored-line'),
- ],
- # block scalar indicators and indentation spaces
- 'indentation': [
- # trailing whitespaces are ignored
- (r'[ ]*$', something(Text), '#pop:2'),
- # whitespaces preceding block collection indicators
- (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)),
- # block collection indicators
- (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)),
- # the beginning a block line
- (r'[ ]*', save_indent(Text), '#pop'),
- ],
- # an indented line in the block context
- 'block-line': [
- # the line end
- (r'[ ]*(?=#|$)', something(Text), '#pop'),
- # whitespaces separating tokens
- (r'[ ]+', Text),
- # key with colon
- (r'''([^#,:?\[\]{}"'\n]+)(:)(?=[ ]|$)''',
- bygroups(Name.Tag, set_indent(Punctuation, implicit=True))),
- # tags, anchors and aliases,
- include('descriptors'),
- # block collections and scalars
- include('block-nodes'),
- # flow collections and quoted scalars
- include('flow-nodes'),
- # a plain scalar
- (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`-]|[?:-]\S)',
- something(Name.Variable),
- 'plain-scalar-in-block-context'),
- ],
- # tags, anchors, aliases
- 'descriptors': [
- # a full-form tag
- (r'!<[\w#;/?:@&=+$,.!~*\'()\[\]%-]+>', Keyword.Type),
- # a tag in the form '!', '!suffix' or '!handle!suffix'
- (r'!(?:[\w-]+!)?'
- r'[\w#;/?:@&=+$,.!~*\'()\[\]%-]*', Keyword.Type),
- # an anchor
- (r'&[\w-]+', Name.Label),
- # an alias
- (r'\*[\w-]+', Name.Variable),
- ],
- # block collections and scalars
- 'block-nodes': [
- # implicit key
- (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)),
- # literal and folded scalars
- (r'[|>]', Punctuation.Indicator,
- ('block-scalar-content', 'block-scalar-header')),
- ],
- # flow collections and quoted scalars
- 'flow-nodes': [
- # a flow sequence
- (r'\[', Punctuation.Indicator, 'flow-sequence'),
- # a flow mapping
- (r'\{', Punctuation.Indicator, 'flow-mapping'),
- # a single-quoted scalar
- (r'\'', String, 'single-quoted-scalar'),
- # a double-quoted scalar
- (r'\"', String, 'double-quoted-scalar'),
- ],
- # the content of a flow collection
- 'flow-collection': [
- # whitespaces
- (r'[ ]+', Text),
- # line breaks
- (r'\n+', Text),
- # a comment
- (r'#[^\n]*', Comment.Single),
- # simple indicators
- (r'[?:,]', Punctuation.Indicator),
- # tags, anchors and aliases
- include('descriptors'),
- # nested collections and quoted scalars
- include('flow-nodes'),
- # a plain scalar
- (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`])',
- something(Name.Variable),
- 'plain-scalar-in-flow-context'),
- ],
- # a flow sequence indicated by '[' and ']'
- 'flow-sequence': [
- # include flow collection rules
- include('flow-collection'),
- # the closing indicator
- (r'\]', Punctuation.Indicator, '#pop'),
- ],
- # a flow mapping indicated by '{' and '}'
- 'flow-mapping': [
- # key with colon
- (r'''([^,:?\[\]{}"'\n]+)(:)(?=[ ]|$)''',
- bygroups(Name.Tag, Punctuation)),
- # include flow collection rules
- include('flow-collection'),
- # the closing indicator
- (r'\}', Punctuation.Indicator, '#pop'),
- ],
- # block scalar lines
- 'block-scalar-content': [
- # line break
- (r'\n', Text),
- # empty line
- (r'^[ ]+$',
- parse_block_scalar_empty_line(Text, Name.Constant)),
- # indentation spaces (we may leave the state here)
- (r'^[ ]*', parse_block_scalar_indent(Text)),
- # line content
- (r'[\S\t ]+', Name.Constant),
- ],
- # the content of a literal or folded scalar
- 'block-scalar-header': [
- # indentation indicator followed by chomping flag
- (r'([1-9])?[+-]?(?=[ ]|$)',
- set_block_scalar_indent(Punctuation.Indicator),
- 'ignored-line'),
- # chomping flag followed by indentation indicator
- (r'[+-]?([1-9])?(?=[ ]|$)',
- set_block_scalar_indent(Punctuation.Indicator),
- 'ignored-line'),
- ],
- # ignored and regular whitespaces in quoted scalars
- 'quoted-scalar-whitespaces': [
- # leading and trailing whitespaces are ignored
- (r'^[ ]+', Text),
- (r'[ ]+$', Text),
- # line breaks are ignored
- (r'\n+', Text),
- # other whitespaces are a part of the value
- (r'[ ]+', Name.Variable),
- ],
- # single-quoted scalars
- 'single-quoted-scalar': [
- # include whitespace and line break rules
- include('quoted-scalar-whitespaces'),
- # escaping of the quote character
- (r'\'\'', String.Escape),
- # regular non-whitespace characters
- (r'[^\s\']+', String),
- # the closing quote
- (r'\'', String, '#pop'),
- ],
- # double-quoted scalars
- 'double-quoted-scalar': [
- # include whitespace and line break rules
- include('quoted-scalar-whitespaces'),
- # escaping of special characters
- (r'\\[0abt\tn\nvfre "\\N_LP]', String),
- # escape codes
- (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})',
- String.Escape),
- # regular non-whitespace characters
- (r'[^\s"\\]+', String),
- # the closing quote
- (r'"', String, '#pop'),
- ],
- # the beginning of a new line while scanning a plain scalar
- 'plain-scalar-in-block-context-new-line': [
- # empty lines
- (r'^[ ]+$', Text),
- # line breaks
- (r'\n+', Text),
- # document start and document end indicators
- (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'),
- # indentation spaces (we may leave the block line state here)
- (r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'),
- ],
- # a plain scalar in the block context
- 'plain-scalar-in-block-context': [
- # the scalar ends with the ':' indicator
- (r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'),
- # the scalar ends with whitespaces followed by a comment
- (r'[ ]+(?=#)', Text, '#pop'),
- # trailing whitespaces are ignored
- (r'[ ]+$', Text),
- # line breaks are ignored
- (r'\n+', Text, 'plain-scalar-in-block-context-new-line'),
- # other whitespaces are a part of the value
- (r'[ ]+', Literal.Scalar.Plain),
- # regular non-whitespace characters
- (r'(?::(?!\s)|[^\s:])+', Literal.Scalar.Plain),
- ],
- # a plain scalar is the flow context
- 'plain-scalar-in-flow-context': [
- # the scalar ends with an indicator character
- (r'[ ]*(?=[,:?\[\]{}])', something(Text), '#pop'),
- # the scalar ends with a comment
- (r'[ ]+(?=#)', Text, '#pop'),
- # leading and trailing whitespaces are ignored
- (r'^[ ]+', Text),
- (r'[ ]+$', Text),
- # line breaks are ignored
- (r'\n+', Text),
- # other whitespaces are a part of the value
- (r'[ ]+', Name.Variable),
- # regular non-whitespace characters
- (r'[^\s,:?\[\]{}]+', Name.Variable),
- ],
- }
- def get_tokens_unprocessed(self, text=None, context=None):
- if context is None:
- context = YamlLexerContext(text, 0)
- return super(YamlLexer, self).get_tokens_unprocessed(text, context)
- class JsonLexer(RegexLexer):
- """
- For JSON data structures.
- .. versionadded:: 1.5
- """
- name = 'JSON'
- aliases = ['json']
- filenames = ['*.json']
- mimetypes = ['application/json']
- flags = re.DOTALL
- # integer part of a number
- int_part = r'-?(0|[1-9]\d*)'
- # fractional part of a number
- frac_part = r'\.\d+'
- # exponential part of a number
- exp_part = r'[eE](\+|-)?\d+'
- tokens = {
- 'whitespace': [
- (r'\s+', Text),
- ],
- # represents a simple terminal value
- 'simplevalue': [
- (r'(true|false|null)\b', Keyword.Constant),
- (('%(int_part)s(%(frac_part)s%(exp_part)s|'
- '%(exp_part)s|%(frac_part)s)') % vars(),
- Number.Float),
- (int_part, Number.Integer),
- (r'"(\\\\|\\"|[^"])*"', String.Double),
- ],
- # the right hand side of an object, after the attribute name
- 'objectattribute': [
- include('value'),
- (r':', Punctuation),
- # comma terminates the attribute but expects more
- (r',', Punctuation, '#pop'),
- # a closing bracket terminates the entire object, so pop twice
- (r'\}', Punctuation, '#pop:2'),
- ],
- # a json object - { attr, attr, ... }
- 'objectvalue': [
- include('whitespace'),
- (r'"(\\\\|\\"|[^"])*"', Name.Tag, 'objectattribute'),
- (r'\}', Punctuation, '#pop'),
- ],
- # json array - [ value, value, ... }
- 'arrayvalue': [
- include('whitespace'),
- include('value'),
- (r',', Punctuation),
- (r'\]', Punctuation, '#pop'),
- ],
- # a json value - either a simple value or a complex value (object or array)
- 'value': [
- include('whitespace'),
- include('simplevalue'),
- (r'\{', Punctuation, 'objectvalue'),
- (r'\[', Punctuation, 'arrayvalue'),
- ],
- # the root of a json document whould be a value
- 'root': [
- include('value'),
- ],
- }
- class JsonBareObjectLexer(JsonLexer):
- """
- For JSON data structures (with missing object curly braces).
- .. versionadded:: 2.2
- """
- name = 'JSONBareObject'
- aliases = ['json-object']
- filenames = []
- mimetypes = ['application/json-object']
- tokens = {
- 'root': [
- (r'\}', Error),
- include('objectvalue'),
- ],
- 'objectattribute': [
- (r'\}', Error),
- inherit,
- ],
- }
- class JsonLdLexer(JsonLexer):
- """
- For `JSON-LD <http://json-ld.org/>`_ linked data.
- .. versionadded:: 2.0
- """
- name = 'JSON-LD'
- aliases = ['jsonld', 'json-ld']
- filenames = ['*.jsonld']
- mimetypes = ['application/ld+json']
- tokens = {
- 'objectvalue': [
- (r'"@(context|id|value|language|type|container|list|set|'
- r'reverse|index|base|vocab|graph)"', Name.Decorator,
- 'objectattribute'),
- inherit,
- ],
- }
|