data.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.data
  4. ~~~~~~~~~~~~~~~~~~~~
  5. Lexers for data file format.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import RegexLexer, ExtendedRegexLexer, LexerContext, \
  11. include, bygroups, inherit
  12. from pygments.token import Text, Comment, Keyword, Name, String, Number, \
  13. Punctuation, Literal, Error
  14. __all__ = ['YamlLexer', 'JsonLexer', 'JsonBareObjectLexer', 'JsonLdLexer']
  15. class YamlLexerContext(LexerContext):
  16. """Indentation context for the YAML lexer."""
  17. def __init__(self, *args, **kwds):
  18. super(YamlLexerContext, self).__init__(*args, **kwds)
  19. self.indent_stack = []
  20. self.indent = -1
  21. self.next_indent = 0
  22. self.block_scalar_indent = None
  23. class YamlLexer(ExtendedRegexLexer):
  24. """
  25. Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization
  26. language.
  27. .. versionadded:: 0.11
  28. """
  29. name = 'YAML'
  30. aliases = ['yaml']
  31. filenames = ['*.yaml', '*.yml']
  32. mimetypes = ['text/x-yaml']
  33. def something(token_class):
  34. """Do not produce empty tokens."""
  35. def callback(lexer, match, context):
  36. text = match.group()
  37. if not text:
  38. return
  39. yield match.start(), token_class, text
  40. context.pos = match.end()
  41. return callback
  42. def reset_indent(token_class):
  43. """Reset the indentation levels."""
  44. def callback(lexer, match, context):
  45. text = match.group()
  46. context.indent_stack = []
  47. context.indent = -1
  48. context.next_indent = 0
  49. context.block_scalar_indent = None
  50. yield match.start(), token_class, text
  51. context.pos = match.end()
  52. return callback
  53. def save_indent(token_class, start=False):
  54. """Save a possible indentation level."""
  55. def callback(lexer, match, context):
  56. text = match.group()
  57. extra = ''
  58. if start:
  59. context.next_indent = len(text)
  60. if context.next_indent < context.indent:
  61. while context.next_indent < context.indent:
  62. context.indent = context.indent_stack.pop()
  63. if context.next_indent > context.indent:
  64. extra = text[context.indent:]
  65. text = text[:context.indent]
  66. else:
  67. context.next_indent += len(text)
  68. if text:
  69. yield match.start(), token_class, text
  70. if extra:
  71. yield match.start()+len(text), token_class.Error, extra
  72. context.pos = match.end()
  73. return callback
  74. def set_indent(token_class, implicit=False):
  75. """Set the previously saved indentation level."""
  76. def callback(lexer, match, context):
  77. text = match.group()
  78. if context.indent < context.next_indent:
  79. context.indent_stack.append(context.indent)
  80. context.indent = context.next_indent
  81. if not implicit:
  82. context.next_indent += len(text)
  83. yield match.start(), token_class, text
  84. context.pos = match.end()
  85. return callback
  86. def set_block_scalar_indent(token_class):
  87. """Set an explicit indentation level for a block scalar."""
  88. def callback(lexer, match, context):
  89. text = match.group()
  90. context.block_scalar_indent = None
  91. if not text:
  92. return
  93. increment = match.group(1)
  94. if increment:
  95. current_indent = max(context.indent, 0)
  96. increment = int(increment)
  97. context.block_scalar_indent = current_indent + increment
  98. if text:
  99. yield match.start(), token_class, text
  100. context.pos = match.end()
  101. return callback
  102. def parse_block_scalar_empty_line(indent_token_class, content_token_class):
  103. """Process an empty line in a block scalar."""
  104. def callback(lexer, match, context):
  105. text = match.group()
  106. if (context.block_scalar_indent is None or
  107. len(text) <= context.block_scalar_indent):
  108. if text:
  109. yield match.start(), indent_token_class, text
  110. else:
  111. indentation = text[:context.block_scalar_indent]
  112. content = text[context.block_scalar_indent:]
  113. yield match.start(), indent_token_class, indentation
  114. yield (match.start()+context.block_scalar_indent,
  115. content_token_class, content)
  116. context.pos = match.end()
  117. return callback
  118. def parse_block_scalar_indent(token_class):
  119. """Process indentation spaces in a block scalar."""
  120. def callback(lexer, match, context):
  121. text = match.group()
  122. if context.block_scalar_indent is None:
  123. if len(text) <= max(context.indent, 0):
  124. context.stack.pop()
  125. context.stack.pop()
  126. return
  127. context.block_scalar_indent = len(text)
  128. else:
  129. if len(text) < context.block_scalar_indent:
  130. context.stack.pop()
  131. context.stack.pop()
  132. return
  133. if text:
  134. yield match.start(), token_class, text
  135. context.pos = match.end()
  136. return callback
  137. def parse_plain_scalar_indent(token_class):
  138. """Process indentation spaces in a plain scalar."""
  139. def callback(lexer, match, context):
  140. text = match.group()
  141. if len(text) <= context.indent:
  142. context.stack.pop()
  143. context.stack.pop()
  144. return
  145. if text:
  146. yield match.start(), token_class, text
  147. context.pos = match.end()
  148. return callback
  149. tokens = {
  150. # the root rules
  151. 'root': [
  152. # ignored whitespaces
  153. (r'[ ]+(?=#|$)', Text),
  154. # line breaks
  155. (r'\n+', Text),
  156. # a comment
  157. (r'#[^\n]*', Comment.Single),
  158. # the '%YAML' directive
  159. (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'),
  160. # the %TAG directive
  161. (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'),
  162. # document start and document end indicators
  163. (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace),
  164. 'block-line'),
  165. # indentation spaces
  166. (r'[ ]*(?!\s|$)', save_indent(Text, start=True),
  167. ('block-line', 'indentation')),
  168. ],
  169. # trailing whitespaces after directives or a block scalar indicator
  170. 'ignored-line': [
  171. # ignored whitespaces
  172. (r'[ ]+(?=#|$)', Text),
  173. # a comment
  174. (r'#[^\n]*', Comment.Single),
  175. # line break
  176. (r'\n', Text, '#pop:2'),
  177. ],
  178. # the %YAML directive
  179. 'yaml-directive': [
  180. # the version number
  181. (r'([ ]+)([0-9]+\.[0-9]+)',
  182. bygroups(Text, Number), 'ignored-line'),
  183. ],
  184. # the %TAG directive
  185. 'tag-directive': [
  186. # a tag handle and the corresponding prefix
  187. (r'([ ]+)(!|![\w-]*!)'
  188. r'([ ]+)(!|!?[\w;/?:@&=+$,.!~*\'()\[\]%-]+)',
  189. bygroups(Text, Keyword.Type, Text, Keyword.Type),
  190. 'ignored-line'),
  191. ],
  192. # block scalar indicators and indentation spaces
  193. 'indentation': [
  194. # trailing whitespaces are ignored
  195. (r'[ ]*$', something(Text), '#pop:2'),
  196. # whitespaces preceding block collection indicators
  197. (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)),
  198. # block collection indicators
  199. (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)),
  200. # the beginning a block line
  201. (r'[ ]*', save_indent(Text), '#pop'),
  202. ],
  203. # an indented line in the block context
  204. 'block-line': [
  205. # the line end
  206. (r'[ ]*(?=#|$)', something(Text), '#pop'),
  207. # whitespaces separating tokens
  208. (r'[ ]+', Text),
  209. # key with colon
  210. (r'''([^#,:?\[\]{}"'\n]+)(:)(?=[ ]|$)''',
  211. bygroups(Name.Tag, set_indent(Punctuation, implicit=True))),
  212. # tags, anchors and aliases,
  213. include('descriptors'),
  214. # block collections and scalars
  215. include('block-nodes'),
  216. # flow collections and quoted scalars
  217. include('flow-nodes'),
  218. # a plain scalar
  219. (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`-]|[?:-]\S)',
  220. something(Name.Variable),
  221. 'plain-scalar-in-block-context'),
  222. ],
  223. # tags, anchors, aliases
  224. 'descriptors': [
  225. # a full-form tag
  226. (r'!<[\w#;/?:@&=+$,.!~*\'()\[\]%-]+>', Keyword.Type),
  227. # a tag in the form '!', '!suffix' or '!handle!suffix'
  228. (r'!(?:[\w-]+!)?'
  229. r'[\w#;/?:@&=+$,.!~*\'()\[\]%-]*', Keyword.Type),
  230. # an anchor
  231. (r'&[\w-]+', Name.Label),
  232. # an alias
  233. (r'\*[\w-]+', Name.Variable),
  234. ],
  235. # block collections and scalars
  236. 'block-nodes': [
  237. # implicit key
  238. (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)),
  239. # literal and folded scalars
  240. (r'[|>]', Punctuation.Indicator,
  241. ('block-scalar-content', 'block-scalar-header')),
  242. ],
  243. # flow collections and quoted scalars
  244. 'flow-nodes': [
  245. # a flow sequence
  246. (r'\[', Punctuation.Indicator, 'flow-sequence'),
  247. # a flow mapping
  248. (r'\{', Punctuation.Indicator, 'flow-mapping'),
  249. # a single-quoted scalar
  250. (r'\'', String, 'single-quoted-scalar'),
  251. # a double-quoted scalar
  252. (r'\"', String, 'double-quoted-scalar'),
  253. ],
  254. # the content of a flow collection
  255. 'flow-collection': [
  256. # whitespaces
  257. (r'[ ]+', Text),
  258. # line breaks
  259. (r'\n+', Text),
  260. # a comment
  261. (r'#[^\n]*', Comment.Single),
  262. # simple indicators
  263. (r'[?:,]', Punctuation.Indicator),
  264. # tags, anchors and aliases
  265. include('descriptors'),
  266. # nested collections and quoted scalars
  267. include('flow-nodes'),
  268. # a plain scalar
  269. (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`])',
  270. something(Name.Variable),
  271. 'plain-scalar-in-flow-context'),
  272. ],
  273. # a flow sequence indicated by '[' and ']'
  274. 'flow-sequence': [
  275. # include flow collection rules
  276. include('flow-collection'),
  277. # the closing indicator
  278. (r'\]', Punctuation.Indicator, '#pop'),
  279. ],
  280. # a flow mapping indicated by '{' and '}'
  281. 'flow-mapping': [
  282. # key with colon
  283. (r'''([^,:?\[\]{}"'\n]+)(:)(?=[ ]|$)''',
  284. bygroups(Name.Tag, Punctuation)),
  285. # include flow collection rules
  286. include('flow-collection'),
  287. # the closing indicator
  288. (r'\}', Punctuation.Indicator, '#pop'),
  289. ],
  290. # block scalar lines
  291. 'block-scalar-content': [
  292. # line break
  293. (r'\n', Text),
  294. # empty line
  295. (r'^[ ]+$',
  296. parse_block_scalar_empty_line(Text, Name.Constant)),
  297. # indentation spaces (we may leave the state here)
  298. (r'^[ ]*', parse_block_scalar_indent(Text)),
  299. # line content
  300. (r'[\S\t ]+', Name.Constant),
  301. ],
  302. # the content of a literal or folded scalar
  303. 'block-scalar-header': [
  304. # indentation indicator followed by chomping flag
  305. (r'([1-9])?[+-]?(?=[ ]|$)',
  306. set_block_scalar_indent(Punctuation.Indicator),
  307. 'ignored-line'),
  308. # chomping flag followed by indentation indicator
  309. (r'[+-]?([1-9])?(?=[ ]|$)',
  310. set_block_scalar_indent(Punctuation.Indicator),
  311. 'ignored-line'),
  312. ],
  313. # ignored and regular whitespaces in quoted scalars
  314. 'quoted-scalar-whitespaces': [
  315. # leading and trailing whitespaces are ignored
  316. (r'^[ ]+', Text),
  317. (r'[ ]+$', Text),
  318. # line breaks are ignored
  319. (r'\n+', Text),
  320. # other whitespaces are a part of the value
  321. (r'[ ]+', Name.Variable),
  322. ],
  323. # single-quoted scalars
  324. 'single-quoted-scalar': [
  325. # include whitespace and line break rules
  326. include('quoted-scalar-whitespaces'),
  327. # escaping of the quote character
  328. (r'\'\'', String.Escape),
  329. # regular non-whitespace characters
  330. (r'[^\s\']+', String),
  331. # the closing quote
  332. (r'\'', String, '#pop'),
  333. ],
  334. # double-quoted scalars
  335. 'double-quoted-scalar': [
  336. # include whitespace and line break rules
  337. include('quoted-scalar-whitespaces'),
  338. # escaping of special characters
  339. (r'\\[0abt\tn\nvfre "\\N_LP]', String),
  340. # escape codes
  341. (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})',
  342. String.Escape),
  343. # regular non-whitespace characters
  344. (r'[^\s"\\]+', String),
  345. # the closing quote
  346. (r'"', String, '#pop'),
  347. ],
  348. # the beginning of a new line while scanning a plain scalar
  349. 'plain-scalar-in-block-context-new-line': [
  350. # empty lines
  351. (r'^[ ]+$', Text),
  352. # line breaks
  353. (r'\n+', Text),
  354. # document start and document end indicators
  355. (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'),
  356. # indentation spaces (we may leave the block line state here)
  357. (r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'),
  358. ],
  359. # a plain scalar in the block context
  360. 'plain-scalar-in-block-context': [
  361. # the scalar ends with the ':' indicator
  362. (r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'),
  363. # the scalar ends with whitespaces followed by a comment
  364. (r'[ ]+(?=#)', Text, '#pop'),
  365. # trailing whitespaces are ignored
  366. (r'[ ]+$', Text),
  367. # line breaks are ignored
  368. (r'\n+', Text, 'plain-scalar-in-block-context-new-line'),
  369. # other whitespaces are a part of the value
  370. (r'[ ]+', Literal.Scalar.Plain),
  371. # regular non-whitespace characters
  372. (r'(?::(?!\s)|[^\s:])+', Literal.Scalar.Plain),
  373. ],
  374. # a plain scalar is the flow context
  375. 'plain-scalar-in-flow-context': [
  376. # the scalar ends with an indicator character
  377. (r'[ ]*(?=[,:?\[\]{}])', something(Text), '#pop'),
  378. # the scalar ends with a comment
  379. (r'[ ]+(?=#)', Text, '#pop'),
  380. # leading and trailing whitespaces are ignored
  381. (r'^[ ]+', Text),
  382. (r'[ ]+$', Text),
  383. # line breaks are ignored
  384. (r'\n+', Text),
  385. # other whitespaces are a part of the value
  386. (r'[ ]+', Name.Variable),
  387. # regular non-whitespace characters
  388. (r'[^\s,:?\[\]{}]+', Name.Variable),
  389. ],
  390. }
  391. def get_tokens_unprocessed(self, text=None, context=None):
  392. if context is None:
  393. context = YamlLexerContext(text, 0)
  394. return super(YamlLexer, self).get_tokens_unprocessed(text, context)
  395. class JsonLexer(RegexLexer):
  396. """
  397. For JSON data structures.
  398. .. versionadded:: 1.5
  399. """
  400. name = 'JSON'
  401. aliases = ['json']
  402. filenames = ['*.json']
  403. mimetypes = ['application/json']
  404. flags = re.DOTALL
  405. # integer part of a number
  406. int_part = r'-?(0|[1-9]\d*)'
  407. # fractional part of a number
  408. frac_part = r'\.\d+'
  409. # exponential part of a number
  410. exp_part = r'[eE](\+|-)?\d+'
  411. tokens = {
  412. 'whitespace': [
  413. (r'\s+', Text),
  414. ],
  415. # represents a simple terminal value
  416. 'simplevalue': [
  417. (r'(true|false|null)\b', Keyword.Constant),
  418. (('%(int_part)s(%(frac_part)s%(exp_part)s|'
  419. '%(exp_part)s|%(frac_part)s)') % vars(),
  420. Number.Float),
  421. (int_part, Number.Integer),
  422. (r'"(\\\\|\\"|[^"])*"', String.Double),
  423. ],
  424. # the right hand side of an object, after the attribute name
  425. 'objectattribute': [
  426. include('value'),
  427. (r':', Punctuation),
  428. # comma terminates the attribute but expects more
  429. (r',', Punctuation, '#pop'),
  430. # a closing bracket terminates the entire object, so pop twice
  431. (r'\}', Punctuation, '#pop:2'),
  432. ],
  433. # a json object - { attr, attr, ... }
  434. 'objectvalue': [
  435. include('whitespace'),
  436. (r'"(\\\\|\\"|[^"])*"', Name.Tag, 'objectattribute'),
  437. (r'\}', Punctuation, '#pop'),
  438. ],
  439. # json array - [ value, value, ... }
  440. 'arrayvalue': [
  441. include('whitespace'),
  442. include('value'),
  443. (r',', Punctuation),
  444. (r'\]', Punctuation, '#pop'),
  445. ],
  446. # a json value - either a simple value or a complex value (object or array)
  447. 'value': [
  448. include('whitespace'),
  449. include('simplevalue'),
  450. (r'\{', Punctuation, 'objectvalue'),
  451. (r'\[', Punctuation, 'arrayvalue'),
  452. ],
  453. # the root of a json document whould be a value
  454. 'root': [
  455. include('value'),
  456. ],
  457. }
  458. class JsonBareObjectLexer(JsonLexer):
  459. """
  460. For JSON data structures (with missing object curly braces).
  461. .. versionadded:: 2.2
  462. """
  463. name = 'JSONBareObject'
  464. aliases = ['json-object']
  465. filenames = []
  466. mimetypes = ['application/json-object']
  467. tokens = {
  468. 'root': [
  469. (r'\}', Error),
  470. include('objectvalue'),
  471. ],
  472. 'objectattribute': [
  473. (r'\}', Error),
  474. inherit,
  475. ],
  476. }
  477. class JsonLdLexer(JsonLexer):
  478. """
  479. For `JSON-LD <http://json-ld.org/>`_ linked data.
  480. .. versionadded:: 2.0
  481. """
  482. name = 'JSON-LD'
  483. aliases = ['jsonld', 'json-ld']
  484. filenames = ['*.jsonld']
  485. mimetypes = ['application/ld+json']
  486. tokens = {
  487. 'objectvalue': [
  488. (r'"@(context|id|value|language|type|container|list|set|'
  489. r'reverse|index|base|vocab|graph)"', Name.Decorator,
  490. 'objectattribute'),
  491. inherit,
  492. ],
  493. }