123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423 |
- # -*- coding: utf-8 -*-
- """
- pygments.lexers.rdf
- ~~~~~~~~~~~~~~~~~~~
- Lexers for semantic web and RDF query languages and markup.
- :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- import re
- from pygments.lexer import RegexLexer, bygroups, default
- from pygments.token import Keyword, Punctuation, String, Number, Operator, Generic, \
- Whitespace, Name, Literal, Comment, Text
- __all__ = ['SparqlLexer', 'TurtleLexer', 'ShExCLexer']
- class SparqlLexer(RegexLexer):
- """
- Lexer for `SPARQL <http://www.w3.org/TR/rdf-sparql-query/>`_ query language.
- .. versionadded:: 2.0
- """
- name = 'SPARQL'
- aliases = ['sparql']
- filenames = ['*.rq', '*.sparql']
- mimetypes = ['application/sparql-query']
- # character group definitions ::
- PN_CHARS_BASE_GRP = (u'a-zA-Z'
- u'\u00c0-\u00d6'
- u'\u00d8-\u00f6'
- u'\u00f8-\u02ff'
- u'\u0370-\u037d'
- u'\u037f-\u1fff'
- u'\u200c-\u200d'
- u'\u2070-\u218f'
- u'\u2c00-\u2fef'
- u'\u3001-\ud7ff'
- u'\uf900-\ufdcf'
- u'\ufdf0-\ufffd')
- PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_')
- PN_CHARS_GRP = (PN_CHARS_U_GRP +
- r'\-' +
- r'0-9' +
- u'\u00b7' +
- u'\u0300-\u036f' +
- u'\u203f-\u2040')
- HEX_GRP = '0-9A-Fa-f'
- PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&"()*+,;=/?#@%'
- # terminal productions ::
- PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']'
- PN_CHARS_U = '[' + PN_CHARS_U_GRP + ']'
- PN_CHARS = '[' + PN_CHARS_GRP + ']'
- HEX = '[' + HEX_GRP + ']'
- PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']'
- IRIREF = r'<(?:[^<>"{}|^`\\\x00-\x20])*>'
- BLANK_NODE_LABEL = '_:[0-9' + PN_CHARS_U_GRP + '](?:[' + PN_CHARS_GRP + \
- '.]*' + PN_CHARS + ')?'
- PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?'
- VARNAME = u'[0-9' + PN_CHARS_U_GRP + '][' + PN_CHARS_U_GRP + \
- u'0-9\u00b7\u0300-\u036f\u203f-\u2040]*'
- PERCENT = '%' + HEX + HEX
- PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS
- PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')'
- PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' +
- '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' +
- PN_CHARS_GRP + ':]|' + PLX + '))?')
- EXPONENT = r'[eE][+-]?\d+'
- # Lexer token definitions ::
- tokens = {
- 'root': [
- (r'\s+', Text),
- # keywords ::
- (r'(?i)(select|construct|describe|ask|where|filter|group\s+by|minus|'
- r'distinct|reduced|from\s+named|from|order\s+by|desc|asc|limit|'
- r'offset|bindings|load|clear|drop|create|add|move|copy|'
- r'insert\s+data|delete\s+data|delete\s+where|delete|insert|'
- r'using\s+named|using|graph|default|named|all|optional|service|'
- r'silent|bind|union|not\s+in|in|as|having|to|prefix|base)\b', Keyword),
- (r'(a)\b', Keyword),
- # IRIs ::
- ('(' + IRIREF + ')', Name.Label),
- # blank nodes ::
- ('(' + BLANK_NODE_LABEL + ')', Name.Label),
- # # variables ::
- ('[?$]' + VARNAME, Name.Variable),
- # prefixed names ::
- (r'(' + PN_PREFIX + r')?(\:)(' + PN_LOCAL + r')?',
- bygroups(Name.Namespace, Punctuation, Name.Tag)),
- # function names ::
- (r'(?i)(str|lang|langmatches|datatype|bound|iri|uri|bnode|rand|abs|'
- r'ceil|floor|round|concat|strlen|ucase|lcase|encode_for_uri|'
- r'contains|strstarts|strends|strbefore|strafter|year|month|day|'
- r'hours|minutes|seconds|timezone|tz|now|md5|sha1|sha256|sha384|'
- r'sha512|coalesce|if|strlang|strdt|sameterm|isiri|isuri|isblank|'
- r'isliteral|isnumeric|regex|substr|replace|exists|not\s+exists|'
- r'count|sum|min|max|avg|sample|group_concat|separator)\b',
- Name.Function),
- # boolean literals ::
- (r'(true|false)', Keyword.Constant),
- # double literals ::
- (r'[+\-]?(\d+\.\d*' + EXPONENT + r'|\.?\d+' + EXPONENT + ')', Number.Float),
- # decimal literals ::
- (r'[+\-]?(\d+\.\d*|\.\d+)', Number.Float),
- # integer literals ::
- (r'[+\-]?\d+', Number.Integer),
- # operators ::
- (r'(\|\||&&|=|\*|\-|\+|/|!=|<=|>=|!|<|>)', Operator),
- # punctuation characters ::
- (r'[(){}.;,:^\[\]]', Punctuation),
- # line comments ::
- (r'#[^\n]*', Comment),
- # strings ::
- (r'"""', String, 'triple-double-quoted-string'),
- (r'"', String, 'single-double-quoted-string'),
- (r"'''", String, 'triple-single-quoted-string'),
- (r"'", String, 'single-single-quoted-string'),
- ],
- 'triple-double-quoted-string': [
- (r'"""', String, 'end-of-string'),
- (r'[^\\]+', String),
- (r'\\', String, 'string-escape'),
- ],
- 'single-double-quoted-string': [
- (r'"', String, 'end-of-string'),
- (r'[^"\\\n]+', String),
- (r'\\', String, 'string-escape'),
- ],
- 'triple-single-quoted-string': [
- (r"'''", String, 'end-of-string'),
- (r'[^\\]+', String),
- (r'\\', String.Escape, 'string-escape'),
- ],
- 'single-single-quoted-string': [
- (r"'", String, 'end-of-string'),
- (r"[^'\\\n]+", String),
- (r'\\', String, 'string-escape'),
- ],
- 'string-escape': [
- (r'u' + HEX + '{4}', String.Escape, '#pop'),
- (r'U' + HEX + '{8}', String.Escape, '#pop'),
- (r'.', String.Escape, '#pop'),
- ],
- 'end-of-string': [
- (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)',
- bygroups(Operator, Name.Function), '#pop:2'),
- (r'\^\^', Operator, '#pop:2'),
- default('#pop:2'),
- ],
- }
- class TurtleLexer(RegexLexer):
- """
- Lexer for `Turtle <http://www.w3.org/TR/turtle/>`_ data language.
- .. versionadded:: 2.1
- """
- name = 'Turtle'
- aliases = ['turtle']
- filenames = ['*.ttl']
- mimetypes = ['text/turtle', 'application/x-turtle']
- flags = re.IGNORECASE
- patterns = {
- 'PNAME_NS': r'((?:[a-z][\w-]*)?\:)', # Simplified character range
- 'IRIREF': r'(<[^<>"{}|^`\\\x00-\x20]*>)'
- }
- # PNAME_NS PN_LOCAL (with simplified character range)
- patterns['PrefixedName'] = r'%(PNAME_NS)s([a-z][\w-]*)' % patterns
- tokens = {
- 'root': [
- (r'\s+', Whitespace),
- # Base / prefix
- (r'(@base|BASE)(\s+)%(IRIREF)s(\s*)(\.?)' % patterns,
- bygroups(Keyword, Whitespace, Name.Variable, Whitespace,
- Punctuation)),
- (r'(@prefix|PREFIX)(\s+)%(PNAME_NS)s(\s+)%(IRIREF)s(\s*)(\.?)' % patterns,
- bygroups(Keyword, Whitespace, Name.Namespace, Whitespace,
- Name.Variable, Whitespace, Punctuation)),
- # The shorthand predicate 'a'
- (r'(?<=\s)a(?=\s)', Keyword.Type),
- # IRIREF
- (r'%(IRIREF)s' % patterns, Name.Variable),
- # PrefixedName
- (r'%(PrefixedName)s' % patterns,
- bygroups(Name.Namespace, Name.Tag)),
- # Comment
- (r'#[^\n]+', Comment),
- (r'\b(true|false)\b', Literal),
- (r'[+\-]?\d*\.\d+', Number.Float),
- (r'[+\-]?\d*(:?\.\d+)?E[+\-]?\d+', Number.Float),
- (r'[+\-]?\d+', Number.Integer),
- (r'[\[\](){}.;,:^]', Punctuation),
- (r'"""', String, 'triple-double-quoted-string'),
- (r'"', String, 'single-double-quoted-string'),
- (r"'''", String, 'triple-single-quoted-string'),
- (r"'", String, 'single-single-quoted-string'),
- ],
- 'triple-double-quoted-string': [
- (r'"""', String, 'end-of-string'),
- (r'[^\\]+', String),
- (r'\\', String, 'string-escape'),
- ],
- 'single-double-quoted-string': [
- (r'"', String, 'end-of-string'),
- (r'[^"\\\n]+', String),
- (r'\\', String, 'string-escape'),
- ],
- 'triple-single-quoted-string': [
- (r"'''", String, 'end-of-string'),
- (r'[^\\]+', String),
- (r'\\', String, 'string-escape'),
- ],
- 'single-single-quoted-string': [
- (r"'", String, 'end-of-string'),
- (r"[^'\\\n]+", String),
- (r'\\', String, 'string-escape'),
- ],
- 'string-escape': [
- (r'.', String, '#pop'),
- ],
- 'end-of-string': [
- (r'(@)([a-z]+(:?-[a-z0-9]+)*)',
- bygroups(Operator, Generic.Emph), '#pop:2'),
- (r'(\^\^)%(IRIREF)s' % patterns, bygroups(Operator, Generic.Emph), '#pop:2'),
- (r'(\^\^)%(PrefixedName)s' % patterns,
- bygroups(Operator, Generic.Emph, Generic.Emph), '#pop:2'),
- default('#pop:2'),
- ],
- }
- # Turtle and Tera Term macro files share the same file extension
- # but each has a recognizable and distinct syntax.
- def analyse_text(text):
- for t in ('@base ', 'BASE ', '@prefix ', 'PREFIX '):
- if re.search(r'^\s*%s' % t, text):
- return 0.80
- class ShExCLexer(RegexLexer):
- """
- Lexer for `ShExC <https://shex.io/shex-semantics/#shexc>`_ shape expressions language syntax.
- """
- name = 'ShExC'
- aliases = ['shexc', 'shex']
- filenames = ['*.shex']
- mimetypes = ['text/shex']
- # character group definitions ::
- PN_CHARS_BASE_GRP = (u'a-zA-Z'
- u'\u00c0-\u00d6'
- u'\u00d8-\u00f6'
- u'\u00f8-\u02ff'
- u'\u0370-\u037d'
- u'\u037f-\u1fff'
- u'\u200c-\u200d'
- u'\u2070-\u218f'
- u'\u2c00-\u2fef'
- u'\u3001-\ud7ff'
- u'\uf900-\ufdcf'
- u'\ufdf0-\ufffd')
- PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_')
- PN_CHARS_GRP = (PN_CHARS_U_GRP +
- r'\-' +
- r'0-9' +
- u'\u00b7' +
- u'\u0300-\u036f' +
- u'\u203f-\u2040')
- HEX_GRP = '0-9A-Fa-f'
- PN_LOCAL_ESC_CHARS_GRP = r"_~.\-!$&'()*+,;=/?#@%"
- # terminal productions ::
- PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']'
- PN_CHARS_U = '[' + PN_CHARS_U_GRP + ']'
- PN_CHARS = '[' + PN_CHARS_GRP + ']'
- HEX = '[' + HEX_GRP + ']'
- PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']'
- UCHAR_NO_BACKSLASH = '(?:u' + HEX + '{4}|U' + HEX + '{8})'
- UCHAR = r'\\' + UCHAR_NO_BACKSLASH
- IRIREF = r'<(?:[^\x00-\x20<>"{}|^`\\]|' + UCHAR + ')*>'
- BLANK_NODE_LABEL = '_:[0-9' + PN_CHARS_U_GRP + '](?:[' + PN_CHARS_GRP + \
- '.]*' + PN_CHARS + ')?'
- PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?'
- PERCENT = '%' + HEX + HEX
- PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS
- PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')'
- PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' +
- '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' +
- PN_CHARS_GRP + ':]|' + PLX + '))?')
- EXPONENT = r'[eE][+-]?\d+'
- # Lexer token definitions ::
- tokens = {
- 'root': [
- (r'\s+', Text),
- # keywords ::
- (r'(?i)(base|prefix|start|external|'
- r'literal|iri|bnode|nonliteral|length|minlength|maxlength|'
- r'mininclusive|minexclusive|maxinclusive|maxexclusive|'
- r'totaldigits|fractiondigits|'
- r'closed|extra)\b', Keyword),
- (r'(a)\b', Keyword),
- # IRIs ::
- ('(' + IRIREF + ')', Name.Label),
- # blank nodes ::
- ('(' + BLANK_NODE_LABEL + ')', Name.Label),
- # prefixed names ::
- (r'(' + PN_PREFIX + r')?(\:)(' + PN_LOCAL + ')?',
- bygroups(Name.Namespace, Punctuation, Name.Tag)),
- # boolean literals ::
- (r'(true|false)', Keyword.Constant),
- # double literals ::
- (r'[+\-]?(\d+\.\d*' + EXPONENT + r'|\.?\d+' + EXPONENT + ')', Number.Float),
- # decimal literals ::
- (r'[+\-]?(\d+\.\d*|\.\d+)', Number.Float),
- # integer literals ::
- (r'[+\-]?\d+', Number.Integer),
- # operators ::
- (r'[@|$&=*+?^\-~]', Operator),
- # operator keywords ::
- (r'(?i)(and|or|not)\b', Operator.Word),
- # punctuation characters ::
- (r'[(){}.;,:^\[\]]', Punctuation),
- # line comments ::
- (r'#[^\n]*', Comment),
- # strings ::
- (r'"""', String, 'triple-double-quoted-string'),
- (r'"', String, 'single-double-quoted-string'),
- (r"'''", String, 'triple-single-quoted-string'),
- (r"'", String, 'single-single-quoted-string'),
- ],
- 'triple-double-quoted-string': [
- (r'"""', String, 'end-of-string'),
- (r'[^\\]+', String),
- (r'\\', String, 'string-escape'),
- ],
- 'single-double-quoted-string': [
- (r'"', String, 'end-of-string'),
- (r'[^"\\\n]+', String),
- (r'\\', String, 'string-escape'),
- ],
- 'triple-single-quoted-string': [
- (r"'''", String, 'end-of-string'),
- (r'[^\\]+', String),
- (r'\\', String.Escape, 'string-escape'),
- ],
- 'single-single-quoted-string': [
- (r"'", String, 'end-of-string'),
- (r"[^'\\\n]+", String),
- (r'\\', String, 'string-escape'),
- ],
- 'string-escape': [
- (UCHAR_NO_BACKSLASH, String.Escape, '#pop'),
- (r'.', String.Escape, '#pop'),
- ],
- 'end-of-string': [
- (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)',
- bygroups(Operator, Name.Function), '#pop:2'),
- (r'\^\^', Operator, '#pop:2'),
- default('#pop:2'),
- ],
- }
|