data.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763
  1. """
  2. pygments.lexers.data
  3. ~~~~~~~~~~~~~~~~~~~~
  4. Lexers for data file format.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. from pygments.lexer import Lexer, ExtendedRegexLexer, LexerContext, \
  9. include, bygroups
  10. from pygments.token import Comment, Error, Keyword, Literal, Name, Number, \
  11. Punctuation, String, Whitespace
  12. __all__ = ['YamlLexer', 'JsonLexer', 'JsonBareObjectLexer', 'JsonLdLexer']
  13. class YamlLexerContext(LexerContext):
  14. """Indentation context for the YAML lexer."""
  15. def __init__(self, *args, **kwds):
  16. super().__init__(*args, **kwds)
  17. self.indent_stack = []
  18. self.indent = -1
  19. self.next_indent = 0
  20. self.block_scalar_indent = None
  21. class YamlLexer(ExtendedRegexLexer):
  22. """
  23. Lexer for YAML, a human-friendly data serialization
  24. language.
  25. """
  26. name = 'YAML'
  27. url = 'http://yaml.org/'
  28. aliases = ['yaml']
  29. filenames = ['*.yaml', '*.yml']
  30. mimetypes = ['text/x-yaml']
  31. version_added = '0.11'
  32. def something(token_class):
  33. """Do not produce empty tokens."""
  34. def callback(lexer, match, context):
  35. text = match.group()
  36. if not text:
  37. return
  38. yield match.start(), token_class, text
  39. context.pos = match.end()
  40. return callback
  41. def reset_indent(token_class):
  42. """Reset the indentation levels."""
  43. def callback(lexer, match, context):
  44. text = match.group()
  45. context.indent_stack = []
  46. context.indent = -1
  47. context.next_indent = 0
  48. context.block_scalar_indent = None
  49. yield match.start(), token_class, text
  50. context.pos = match.end()
  51. return callback
  52. def save_indent(token_class, start=False):
  53. """Save a possible indentation level."""
  54. def callback(lexer, match, context):
  55. text = match.group()
  56. extra = ''
  57. if start:
  58. context.next_indent = len(text)
  59. if context.next_indent < context.indent:
  60. while context.next_indent < context.indent:
  61. context.indent = context.indent_stack.pop()
  62. if context.next_indent > context.indent:
  63. extra = text[context.indent:]
  64. text = text[:context.indent]
  65. else:
  66. context.next_indent += len(text)
  67. if text:
  68. yield match.start(), token_class, text
  69. if extra:
  70. yield match.start()+len(text), token_class.Error, extra
  71. context.pos = match.end()
  72. return callback
  73. def set_indent(token_class, implicit=False):
  74. """Set the previously saved indentation level."""
  75. def callback(lexer, match, context):
  76. text = match.group()
  77. if context.indent < context.next_indent:
  78. context.indent_stack.append(context.indent)
  79. context.indent = context.next_indent
  80. if not implicit:
  81. context.next_indent += len(text)
  82. yield match.start(), token_class, text
  83. context.pos = match.end()
  84. return callback
  85. def set_block_scalar_indent(token_class):
  86. """Set an explicit indentation level for a block scalar."""
  87. def callback(lexer, match, context):
  88. text = match.group()
  89. context.block_scalar_indent = None
  90. if not text:
  91. return
  92. increment = match.group(1)
  93. if increment:
  94. current_indent = max(context.indent, 0)
  95. increment = int(increment)
  96. context.block_scalar_indent = current_indent + increment
  97. if text:
  98. yield match.start(), token_class, text
  99. context.pos = match.end()
  100. return callback
  101. def parse_block_scalar_empty_line(indent_token_class, content_token_class):
  102. """Process an empty line in a block scalar."""
  103. def callback(lexer, match, context):
  104. text = match.group()
  105. if (context.block_scalar_indent is None or
  106. len(text) <= context.block_scalar_indent):
  107. if text:
  108. yield match.start(), indent_token_class, text
  109. else:
  110. indentation = text[:context.block_scalar_indent]
  111. content = text[context.block_scalar_indent:]
  112. yield match.start(), indent_token_class, indentation
  113. yield (match.start()+context.block_scalar_indent,
  114. content_token_class, content)
  115. context.pos = match.end()
  116. return callback
  117. def parse_block_scalar_indent(token_class):
  118. """Process indentation spaces in a block scalar."""
  119. def callback(lexer, match, context):
  120. text = match.group()
  121. if context.block_scalar_indent is None:
  122. if len(text) <= max(context.indent, 0):
  123. context.stack.pop()
  124. context.stack.pop()
  125. return
  126. context.block_scalar_indent = len(text)
  127. else:
  128. if len(text) < context.block_scalar_indent:
  129. context.stack.pop()
  130. context.stack.pop()
  131. return
  132. if text:
  133. yield match.start(), token_class, text
  134. context.pos = match.end()
  135. return callback
  136. def parse_plain_scalar_indent(token_class):
  137. """Process indentation spaces in a plain scalar."""
  138. def callback(lexer, match, context):
  139. text = match.group()
  140. if len(text) <= context.indent:
  141. context.stack.pop()
  142. context.stack.pop()
  143. return
  144. if text:
  145. yield match.start(), token_class, text
  146. context.pos = match.end()
  147. return callback
  148. tokens = {
  149. # the root rules
  150. 'root': [
  151. # ignored whitespaces
  152. (r'[ ]+(?=#|$)', Whitespace),
  153. # line breaks
  154. (r'\n+', Whitespace),
  155. # a comment
  156. (r'#[^\n]*', Comment.Single),
  157. # the '%YAML' directive
  158. (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'),
  159. # the %TAG directive
  160. (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'),
  161. # document start and document end indicators
  162. (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace),
  163. 'block-line'),
  164. # indentation spaces
  165. (r'[ ]*(?!\s|$)', save_indent(Whitespace, start=True),
  166. ('block-line', 'indentation')),
  167. ],
  168. # trailing whitespaces after directives or a block scalar indicator
  169. 'ignored-line': [
  170. # ignored whitespaces
  171. (r'[ ]+(?=#|$)', Whitespace),
  172. # a comment
  173. (r'#[^\n]*', Comment.Single),
  174. # line break
  175. (r'\n', Whitespace, '#pop:2'),
  176. ],
  177. # the %YAML directive
  178. 'yaml-directive': [
  179. # the version number
  180. (r'([ ]+)([0-9]+\.[0-9]+)',
  181. bygroups(Whitespace, Number), 'ignored-line'),
  182. ],
  183. # the %TAG directive
  184. 'tag-directive': [
  185. # a tag handle and the corresponding prefix
  186. (r'([ ]+)(!|![\w-]*!)'
  187. r'([ ]+)(!|!?[\w;/?:@&=+$,.!~*\'()\[\]%-]+)',
  188. bygroups(Whitespace, Keyword.Type, Whitespace, Keyword.Type),
  189. 'ignored-line'),
  190. ],
  191. # block scalar indicators and indentation spaces
  192. 'indentation': [
  193. # trailing whitespaces are ignored
  194. (r'[ ]*$', something(Whitespace), '#pop:2'),
  195. # whitespaces preceding block collection indicators
  196. (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Whitespace)),
  197. # block collection indicators
  198. (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)),
  199. # the beginning a block line
  200. (r'[ ]*', save_indent(Whitespace), '#pop'),
  201. ],
  202. # an indented line in the block context
  203. 'block-line': [
  204. # the line end
  205. (r'[ ]*(?=#|$)', something(Whitespace), '#pop'),
  206. # whitespaces separating tokens
  207. (r'[ ]+', Whitespace),
  208. # key with colon
  209. (r'''([^#,?\[\]{}"'\n]+)(:)(?=[ ]|$)''',
  210. bygroups(Name.Tag, set_indent(Punctuation, implicit=True))),
  211. # tags, anchors and aliases,
  212. include('descriptors'),
  213. # block collections and scalars
  214. include('block-nodes'),
  215. # flow collections and quoted scalars
  216. include('flow-nodes'),
  217. # a plain scalar
  218. (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`-]|[?:-]\S)',
  219. something(Name.Variable),
  220. 'plain-scalar-in-block-context'),
  221. ],
  222. # tags, anchors, aliases
  223. 'descriptors': [
  224. # a full-form tag
  225. (r'!<[\w#;/?:@&=+$,.!~*\'()\[\]%-]+>', Keyword.Type),
  226. # a tag in the form '!', '!suffix' or '!handle!suffix'
  227. (r'!(?:[\w-]+!)?'
  228. r'[\w#;/?:@&=+$,.!~*\'()\[\]%-]*', Keyword.Type),
  229. # an anchor
  230. (r'&[\w-]+', Name.Label),
  231. # an alias
  232. (r'\*[\w-]+', Name.Variable),
  233. ],
  234. # block collections and scalars
  235. 'block-nodes': [
  236. # implicit key
  237. (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)),
  238. # literal and folded scalars
  239. (r'[|>]', Punctuation.Indicator,
  240. ('block-scalar-content', 'block-scalar-header')),
  241. ],
  242. # flow collections and quoted scalars
  243. 'flow-nodes': [
  244. # a flow sequence
  245. (r'\[', Punctuation.Indicator, 'flow-sequence'),
  246. # a flow mapping
  247. (r'\{', Punctuation.Indicator, 'flow-mapping'),
  248. # a single-quoted scalar
  249. (r'\'', String, 'single-quoted-scalar'),
  250. # a double-quoted scalar
  251. (r'\"', String, 'double-quoted-scalar'),
  252. ],
  253. # the content of a flow collection
  254. 'flow-collection': [
  255. # whitespaces
  256. (r'[ ]+', Whitespace),
  257. # line breaks
  258. (r'\n+', Whitespace),
  259. # a comment
  260. (r'#[^\n]*', Comment.Single),
  261. # simple indicators
  262. (r'[?:,]', Punctuation.Indicator),
  263. # tags, anchors and aliases
  264. include('descriptors'),
  265. # nested collections and quoted scalars
  266. include('flow-nodes'),
  267. # a plain scalar
  268. (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`])',
  269. something(Name.Variable),
  270. 'plain-scalar-in-flow-context'),
  271. ],
  272. # a flow sequence indicated by '[' and ']'
  273. 'flow-sequence': [
  274. # include flow collection rules
  275. include('flow-collection'),
  276. # the closing indicator
  277. (r'\]', Punctuation.Indicator, '#pop'),
  278. ],
  279. # a flow mapping indicated by '{' and '}'
  280. 'flow-mapping': [
  281. # key with colon
  282. (r'''([^,:?\[\]{}"'\n]+)(:)(?=[ ]|$)''',
  283. bygroups(Name.Tag, Punctuation)),
  284. # include flow collection rules
  285. include('flow-collection'),
  286. # the closing indicator
  287. (r'\}', Punctuation.Indicator, '#pop'),
  288. ],
  289. # block scalar lines
  290. 'block-scalar-content': [
  291. # line break
  292. (r'\n', Whitespace),
  293. # empty line
  294. (r'^[ ]+$',
  295. parse_block_scalar_empty_line(Whitespace, Name.Constant)),
  296. # indentation spaces (we may leave the state here)
  297. (r'^[ ]*', parse_block_scalar_indent(Whitespace)),
  298. # line content
  299. (r'[\S\t ]+', Name.Constant),
  300. ],
  301. # the content of a literal or folded scalar
  302. 'block-scalar-header': [
  303. # indentation indicator followed by chomping flag
  304. (r'([1-9])?[+-]?(?=[ ]|$)',
  305. set_block_scalar_indent(Punctuation.Indicator),
  306. 'ignored-line'),
  307. # chomping flag followed by indentation indicator
  308. (r'[+-]?([1-9])?(?=[ ]|$)',
  309. set_block_scalar_indent(Punctuation.Indicator),
  310. 'ignored-line'),
  311. ],
  312. # ignored and regular whitespaces in quoted scalars
  313. 'quoted-scalar-whitespaces': [
  314. # leading and trailing whitespaces are ignored
  315. (r'^[ ]+', Whitespace),
  316. (r'[ ]+$', Whitespace),
  317. # line breaks are ignored
  318. (r'\n+', Whitespace),
  319. # other whitespaces are a part of the value
  320. (r'[ ]+', Name.Variable),
  321. ],
  322. # single-quoted scalars
  323. 'single-quoted-scalar': [
  324. # include whitespace and line break rules
  325. include('quoted-scalar-whitespaces'),
  326. # escaping of the quote character
  327. (r'\'\'', String.Escape),
  328. # regular non-whitespace characters
  329. (r'[^\s\']+', String),
  330. # the closing quote
  331. (r'\'', String, '#pop'),
  332. ],
  333. # double-quoted scalars
  334. 'double-quoted-scalar': [
  335. # include whitespace and line break rules
  336. include('quoted-scalar-whitespaces'),
  337. # escaping of special characters
  338. (r'\\[0abt\tn\nvfre "\\N_LP]', String),
  339. # escape codes
  340. (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})',
  341. String.Escape),
  342. # regular non-whitespace characters
  343. (r'[^\s"\\]+', String),
  344. # the closing quote
  345. (r'"', String, '#pop'),
  346. ],
  347. # the beginning of a new line while scanning a plain scalar
  348. 'plain-scalar-in-block-context-new-line': [
  349. # empty lines
  350. (r'^[ ]+$', Whitespace),
  351. # line breaks
  352. (r'\n+', Whitespace),
  353. # document start and document end indicators
  354. (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'),
  355. # indentation spaces (we may leave the block line state here)
  356. (r'^[ ]*', parse_plain_scalar_indent(Whitespace), '#pop'),
  357. ],
  358. # a plain scalar in the block context
  359. 'plain-scalar-in-block-context': [
  360. # the scalar ends with the ':' indicator
  361. (r'[ ]*(?=:[ ]|:$)', something(Whitespace), '#pop'),
  362. # the scalar ends with whitespaces followed by a comment
  363. (r'[ ]+(?=#)', Whitespace, '#pop'),
  364. # trailing whitespaces are ignored
  365. (r'[ ]+$', Whitespace),
  366. # line breaks are ignored
  367. (r'\n+', Whitespace, 'plain-scalar-in-block-context-new-line'),
  368. # other whitespaces are a part of the value
  369. (r'[ ]+', Literal.Scalar.Plain),
  370. # regular non-whitespace characters
  371. (r'(?::(?!\s)|[^\s:])+', Literal.Scalar.Plain),
  372. ],
  373. # a plain scalar is the flow context
  374. 'plain-scalar-in-flow-context': [
  375. # the scalar ends with an indicator character
  376. (r'[ ]*(?=[,:?\[\]{}])', something(Whitespace), '#pop'),
  377. # the scalar ends with a comment
  378. (r'[ ]+(?=#)', Whitespace, '#pop'),
  379. # leading and trailing whitespaces are ignored
  380. (r'^[ ]+', Whitespace),
  381. (r'[ ]+$', Whitespace),
  382. # line breaks are ignored
  383. (r'\n+', Whitespace),
  384. # other whitespaces are a part of the value
  385. (r'[ ]+', Name.Variable),
  386. # regular non-whitespace characters
  387. (r'[^\s,:?\[\]{}]+', Name.Variable),
  388. ],
  389. }
  390. def get_tokens_unprocessed(self, text=None, context=None):
  391. if context is None:
  392. context = YamlLexerContext(text, 0)
  393. return super().get_tokens_unprocessed(text, context)
  394. class JsonLexer(Lexer):
  395. """
  396. For JSON data structures.
  397. Javascript-style comments are supported (like ``/* */`` and ``//``),
  398. though comments are not part of the JSON specification.
  399. This allows users to highlight JSON as it is used in the wild.
  400. No validation is performed on the input JSON document.
  401. """
  402. name = 'JSON'
  403. url = 'https://www.json.org'
  404. aliases = ['json', 'json-object']
  405. filenames = ['*.json', '*.jsonl', '*.ndjson', 'Pipfile.lock']
  406. mimetypes = ['application/json', 'application/json-object', 'application/x-ndjson', 'application/jsonl', 'application/json-seq']
  407. version_added = '1.5'
  408. # No validation of integers, floats, or constants is done.
  409. # As long as the characters are members of the following
  410. # sets, the token will be considered valid. For example,
  411. #
  412. # "--1--" is parsed as an integer
  413. # "1...eee" is parsed as a float
  414. # "trustful" is parsed as a constant
  415. #
  416. integers = set('-0123456789')
  417. floats = set('.eE+')
  418. constants = set('truefalsenull') # true|false|null
  419. hexadecimals = set('0123456789abcdefABCDEF')
  420. punctuations = set('{}[],')
  421. whitespaces = {'\u0020', '\u000a', '\u000d', '\u0009'}
  422. def get_tokens_unprocessed(self, text):
  423. """Parse JSON data."""
  424. in_string = False
  425. in_escape = False
  426. in_unicode_escape = 0
  427. in_whitespace = False
  428. in_constant = False
  429. in_number = False
  430. in_float = False
  431. in_punctuation = False
  432. in_comment_single = False
  433. in_comment_multiline = False
  434. expecting_second_comment_opener = False # // or /*
  435. expecting_second_comment_closer = False # */
  436. start = 0
  437. # The queue is used to store data that may need to be tokenized
  438. # differently based on what follows. In particular, JSON object
  439. # keys are tokenized differently than string values, but cannot
  440. # be distinguished until punctuation is encountered outside the
  441. # string.
  442. #
  443. # A ":" character after the string indicates that the string is
  444. # an object key; any other character indicates the string is a
  445. # regular string value.
  446. #
  447. # The queue holds tuples that contain the following data:
  448. #
  449. # (start_index, token_type, text)
  450. #
  451. # By default the token type of text in double quotes is
  452. # String.Double. The token type will be replaced if a colon
  453. # is encountered after the string closes.
  454. #
  455. queue = []
  456. for stop, character in enumerate(text):
  457. if in_string:
  458. if in_unicode_escape:
  459. if character in self.hexadecimals:
  460. in_unicode_escape -= 1
  461. if not in_unicode_escape:
  462. in_escape = False
  463. else:
  464. in_unicode_escape = 0
  465. in_escape = False
  466. elif in_escape:
  467. if character == 'u':
  468. in_unicode_escape = 4
  469. else:
  470. in_escape = False
  471. elif character == '\\':
  472. in_escape = True
  473. elif character == '"':
  474. queue.append((start, String.Double, text[start:stop + 1]))
  475. in_string = False
  476. in_escape = False
  477. in_unicode_escape = 0
  478. continue
  479. elif in_whitespace:
  480. if character in self.whitespaces:
  481. continue
  482. if queue:
  483. queue.append((start, Whitespace, text[start:stop]))
  484. else:
  485. yield start, Whitespace, text[start:stop]
  486. in_whitespace = False
  487. # Fall through so the new character can be evaluated.
  488. elif in_constant:
  489. if character in self.constants:
  490. continue
  491. yield start, Keyword.Constant, text[start:stop]
  492. in_constant = False
  493. # Fall through so the new character can be evaluated.
  494. elif in_number:
  495. if character in self.integers:
  496. continue
  497. elif character in self.floats:
  498. in_float = True
  499. continue
  500. if in_float:
  501. yield start, Number.Float, text[start:stop]
  502. else:
  503. yield start, Number.Integer, text[start:stop]
  504. in_number = False
  505. in_float = False
  506. # Fall through so the new character can be evaluated.
  507. elif in_punctuation:
  508. if character in self.punctuations:
  509. continue
  510. yield start, Punctuation, text[start:stop]
  511. in_punctuation = False
  512. # Fall through so the new character can be evaluated.
  513. elif in_comment_single:
  514. if character != '\n':
  515. continue
  516. if queue:
  517. queue.append((start, Comment.Single, text[start:stop]))
  518. else:
  519. yield start, Comment.Single, text[start:stop]
  520. in_comment_single = False
  521. # Fall through so the new character can be evaluated.
  522. elif in_comment_multiline:
  523. if character == '*':
  524. expecting_second_comment_closer = True
  525. elif expecting_second_comment_closer:
  526. expecting_second_comment_closer = False
  527. if character == '/':
  528. if queue:
  529. queue.append((start, Comment.Multiline, text[start:stop + 1]))
  530. else:
  531. yield start, Comment.Multiline, text[start:stop + 1]
  532. in_comment_multiline = False
  533. continue
  534. elif expecting_second_comment_opener:
  535. expecting_second_comment_opener = False
  536. if character == '/':
  537. in_comment_single = True
  538. continue
  539. elif character == '*':
  540. in_comment_multiline = True
  541. continue
  542. # Exhaust the queue. Accept the existing token types.
  543. yield from queue
  544. queue.clear()
  545. yield start, Error, text[start:stop]
  546. # Fall through so the new character can be evaluated.
  547. start = stop
  548. if character == '"':
  549. in_string = True
  550. elif character in self.whitespaces:
  551. in_whitespace = True
  552. elif character in {'f', 'n', 't'}: # The first letters of true|false|null
  553. # Exhaust the queue. Accept the existing token types.
  554. yield from queue
  555. queue.clear()
  556. in_constant = True
  557. elif character in self.integers:
  558. # Exhaust the queue. Accept the existing token types.
  559. yield from queue
  560. queue.clear()
  561. in_number = True
  562. elif character == ':':
  563. # Yield from the queue. Replace string token types.
  564. for _start, _token, _text in queue:
  565. # There can be only three types of tokens before a ':':
  566. # Whitespace, Comment, or a quoted string.
  567. #
  568. # If it's a quoted string we emit Name.Tag.
  569. # Otherwise, we yield the original token.
  570. #
  571. # In all other cases this would be invalid JSON,
  572. # but this is not a validating JSON lexer, so it's OK.
  573. if _token is String.Double:
  574. yield _start, Name.Tag, _text
  575. else:
  576. yield _start, _token, _text
  577. queue.clear()
  578. in_punctuation = True
  579. elif character in self.punctuations:
  580. # Exhaust the queue. Accept the existing token types.
  581. yield from queue
  582. queue.clear()
  583. in_punctuation = True
  584. elif character == '/':
  585. # This is the beginning of a comment.
  586. expecting_second_comment_opener = True
  587. else:
  588. # Exhaust the queue. Accept the existing token types.
  589. yield from queue
  590. queue.clear()
  591. yield start, Error, character
  592. # Yield any remaining text.
  593. yield from queue
  594. if in_string:
  595. yield start, Error, text[start:]
  596. elif in_float:
  597. yield start, Number.Float, text[start:]
  598. elif in_number:
  599. yield start, Number.Integer, text[start:]
  600. elif in_constant:
  601. yield start, Keyword.Constant, text[start:]
  602. elif in_whitespace:
  603. yield start, Whitespace, text[start:]
  604. elif in_punctuation:
  605. yield start, Punctuation, text[start:]
  606. elif in_comment_single:
  607. yield start, Comment.Single, text[start:]
  608. elif in_comment_multiline:
  609. yield start, Error, text[start:]
  610. elif expecting_second_comment_opener:
  611. yield start, Error, text[start:]
  612. class JsonBareObjectLexer(JsonLexer):
  613. """
  614. For JSON data structures (with missing object curly braces).
  615. .. deprecated:: 2.8.0
  616. Behaves the same as `JsonLexer` now.
  617. """
  618. name = 'JSONBareObject'
  619. aliases = []
  620. filenames = []
  621. mimetypes = []
  622. version_added = '2.2'
  623. class JsonLdLexer(JsonLexer):
  624. """
  625. For JSON-LD linked data.
  626. """
  627. name = 'JSON-LD'
  628. url = 'https://json-ld.org/'
  629. aliases = ['jsonld', 'json-ld']
  630. filenames = ['*.jsonld']
  631. mimetypes = ['application/ld+json']
  632. version_added = '2.0'
  633. json_ld_keywords = {
  634. f'"@{keyword}"'
  635. for keyword in (
  636. 'base',
  637. 'container',
  638. 'context',
  639. 'direction',
  640. 'graph',
  641. 'id',
  642. 'import',
  643. 'included',
  644. 'index',
  645. 'json',
  646. 'language',
  647. 'list',
  648. 'nest',
  649. 'none',
  650. 'prefix',
  651. 'propagate',
  652. 'protected',
  653. 'reverse',
  654. 'set',
  655. 'type',
  656. 'value',
  657. 'version',
  658. 'vocab',
  659. )
  660. }
  661. def get_tokens_unprocessed(self, text):
  662. for start, token, value in super().get_tokens_unprocessed(text):
  663. if token is Name.Tag and value in self.json_ld_keywords:
  664. yield start, Name.Decorator, value
  665. else:
  666. yield start, token, value